1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 /*
  26  * Multipath driver interface (MDI) implementation; see mdi_impldefs.h for a
  27  * more detailed discussion of the overall mpxio architecture.
  28  *
  29  * Default locking order:
  30  *
  31  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex);
  32  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex);
  33  * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex);
  34  * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex);
  35  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
  36  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
  37  * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
  38  */
  39 
  40 #include <sys/note.h>
  41 #include <sys/types.h>
  42 #include <sys/varargs.h>
  43 #include <sys/param.h>
  44 #include <sys/errno.h>
  45 #include <sys/uio.h>
  46 #include <sys/buf.h>
  47 #include <sys/modctl.h>
  48 #include <sys/open.h>
  49 #include <sys/kmem.h>
  50 #include <sys/poll.h>
  51 #include <sys/conf.h>
  52 #include <sys/bootconf.h>
  53 #include <sys/cmn_err.h>
  54 #include <sys/stat.h>
  55 #include <sys/ddi.h>
  56 #include <sys/sunddi.h>
  57 #include <sys/ddipropdefs.h>
  58 #include <sys/sunndi.h>
  59 #include <sys/ndi_impldefs.h>
  60 #include <sys/promif.h>
  61 #include <sys/sunmdi.h>
  62 #include <sys/mdi_impldefs.h>
  63 #include <sys/taskq.h>
  64 #include <sys/epm.h>
  65 #include <sys/sunpm.h>
  66 #include <sys/modhash.h>
  67 #include <sys/disp.h>
  68 #include <sys/autoconf.h>
  69 #include <sys/sysmacros.h>
  70 
  71 #ifdef  DEBUG
  72 #include <sys/debug.h>
  73 int     mdi_debug = 1;
  74 int     mdi_debug_logonly = 0;
  75 #define MDI_DEBUG(dbglevel, pargs) if (mdi_debug >= (dbglevel))      i_mdi_log pargs
  76 #define MDI_WARN        CE_WARN, __func__
  77 #define MDI_NOTE        CE_NOTE, __func__
  78 #define MDI_CONT        CE_CONT, __func__
  79 static void i_mdi_log(int, const char *, dev_info_t *, const char *, ...);
  80 #else   /* !DEBUG */
  81 #define MDI_DEBUG(dbglevel, pargs)
  82 #endif  /* DEBUG */
  83 int     mdi_debug_consoleonly = 0;
  84 int     mdi_delay = 3;
  85 
  86 extern pri_t    minclsyspri;
  87 extern int      modrootloaded;
  88 
  89 /*
  90  * Global mutex:
  91  * Protects vHCI list and structure members.
  92  */
  93 kmutex_t        mdi_mutex;
  94 
  95 /*
  96  * Registered vHCI class driver lists
  97  */
  98 int             mdi_vhci_count;
  99 mdi_vhci_t      *mdi_vhci_head;
 100 mdi_vhci_t      *mdi_vhci_tail;
 101 
 102 /*
 103  * Client Hash Table size
 104  */
 105 static int      mdi_client_table_size = CLIENT_HASH_TABLE_SIZE;
 106 
 107 /*
 108  * taskq interface definitions
 109  */
 110 #define MDI_TASKQ_N_THREADS     8
 111 #define MDI_TASKQ_PRI           minclsyspri
 112 #define MDI_TASKQ_MINALLOC      (4*mdi_taskq_n_threads)
 113 #define MDI_TASKQ_MAXALLOC      (500*mdi_taskq_n_threads)
 114 
 115 taskq_t                         *mdi_taskq;
 116 static uint_t                   mdi_taskq_n_threads = MDI_TASKQ_N_THREADS;
 117 
 118 #define TICKS_PER_SECOND        (drv_usectohz(1000000))
 119 
 120 /*
 121  * The data should be "quiet" for this interval (in seconds) before the
 122  * vhci cached data is flushed to the disk.
 123  */
 124 static int mdi_vhcache_flush_delay = 10;
 125 
 126 /* number of seconds the vhcache flush daemon will sleep idle before exiting */
 127 static int mdi_vhcache_flush_daemon_idle_time = 60;
 128 
 129 /*
 130  * MDI falls back to discovery of all paths when a bus_config_one fails.
 131  * The following parameters can be used to tune this operation.
 132  *
 133  * mdi_path_discovery_boot
 134  *      Number of times path discovery will be attempted during early boot.
 135  *      Probably there is no reason to ever set this value to greater than one.
 136  *
 137  * mdi_path_discovery_postboot
 138  *      Number of times path discovery will be attempted after early boot.
 139  *      Set it to a minimum of two to allow for discovery of iscsi paths which
 140  *      may happen very late during booting.
 141  *
 142  * mdi_path_discovery_interval
 143  *      Minimum number of seconds MDI will wait between successive discovery
 144  *      of all paths. Set it to -1 to disable discovery of all paths.
 145  */
 146 static int mdi_path_discovery_boot = 1;
 147 static int mdi_path_discovery_postboot = 2;
 148 static int mdi_path_discovery_interval = 10;
 149 
 150 /*
 151  * number of seconds the asynchronous configuration thread will sleep idle
 152  * before exiting.
 153  */
 154 static int mdi_async_config_idle_time = 600;
 155 
 156 static int mdi_bus_config_cache_hash_size = 256;
 157 
 158 /* turns off multithreaded configuration for certain operations */
 159 static int mdi_mtc_off = 0;
 160 
 161 /*
 162  * The "path" to a pathinfo node is identical to the /devices path to a
 163  * devinfo node had the device been enumerated under a pHCI instead of
 164  * a vHCI.  This pathinfo "path" is associated with a 'path_instance'.
 165  * This association persists across create/delete of the pathinfo nodes,
 166  * but not across reboot.
 167  */
 168 static uint_t           mdi_pathmap_instance = 1;       /* 0 -> any path */
 169 static int              mdi_pathmap_hash_size = 256;
 170 static kmutex_t         mdi_pathmap_mutex;
 171 static mod_hash_t       *mdi_pathmap_bypath;            /* "path"->instance */
 172 static mod_hash_t       *mdi_pathmap_byinstance;        /* instance->"path" */
 173 static mod_hash_t       *mdi_pathmap_sbyinstance;       /* inst->shortpath */
 174 
 175 /*
 176  * MDI component property name/value string definitions
 177  */
 178 const char              *mdi_component_prop = "mpxio-component";
 179 const char              *mdi_component_prop_vhci = "vhci";
 180 const char              *mdi_component_prop_phci = "phci";
 181 const char              *mdi_component_prop_client = "client";
 182 
 183 /*
 184  * MDI client global unique identifier property name
 185  */
 186 const char              *mdi_client_guid_prop = "client-guid";
 187 
 188 /*
 189  * MDI client load balancing property name/value string definitions
 190  */
 191 const char              *mdi_load_balance = "load-balance";
 192 const char              *mdi_load_balance_none = "none";
 193 const char              *mdi_load_balance_rr = "round-robin";
 194 const char              *mdi_load_balance_lba = "logical-block";
 195 
 196 /*
 197  * Obsolete vHCI class definition; to be removed after Leadville update
 198  */
 199 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI;
 200 
 201 static char vhci_greeting[] =
 202         "\tThere already exists one vHCI driver for class %s\n"
 203         "\tOnly one vHCI driver for each class is allowed\n";
 204 
 205 /*
 206  * Static function prototypes
 207  */
 208 static int              i_mdi_phci_offline(dev_info_t *, uint_t);
 209 static int              i_mdi_client_offline(dev_info_t *, uint_t);
 210 static int              i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t);
 211 static void             i_mdi_phci_post_detach(dev_info_t *,
 212                             ddi_detach_cmd_t, int);
 213 static int              i_mdi_client_pre_detach(dev_info_t *,
 214                             ddi_detach_cmd_t);
 215 static void             i_mdi_client_post_detach(dev_info_t *,
 216                             ddi_detach_cmd_t, int);
 217 static void             i_mdi_pm_hold_pip(mdi_pathinfo_t *);
 218 static void             i_mdi_pm_rele_pip(mdi_pathinfo_t *);
 219 static int              i_mdi_lba_lb(mdi_client_t *ct,
 220                             mdi_pathinfo_t **ret_pip, struct buf *buf);
 221 static void             i_mdi_pm_hold_client(mdi_client_t *, int);
 222 static void             i_mdi_pm_rele_client(mdi_client_t *, int);
 223 static void             i_mdi_pm_reset_client(mdi_client_t *);
 224 static int              i_mdi_power_all_phci(mdi_client_t *);
 225 static void             i_mdi_log_sysevent(dev_info_t *, char *, char *);
 226 
 227 
 228 /*
 229  * Internal mdi_pathinfo node functions
 230  */
 231 static void             i_mdi_pi_kstat_destroy(mdi_pathinfo_t *);
 232 
 233 static mdi_vhci_t       *i_mdi_vhci_class2vhci(char *);
 234 static mdi_vhci_t       *i_devi_get_vhci(dev_info_t *);
 235 static mdi_phci_t       *i_devi_get_phci(dev_info_t *);
 236 static void             i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *);
 237 static void             i_mdi_phci_unlock(mdi_phci_t *);
 238 static mdi_pathinfo_t   *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *);
 239 static void             i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *);
 240 static void             i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *);
 241 static void             i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *,
 242                             mdi_client_t *);
 243 static void             i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *);
 244 static void             i_mdi_client_remove_path(mdi_client_t *,
 245                             mdi_pathinfo_t *);
 246 
 247 static int              i_mdi_pi_state_change(mdi_pathinfo_t *,
 248                             mdi_pathinfo_state_t, int);
 249 static int              i_mdi_pi_offline(mdi_pathinfo_t *, int);
 250 static dev_info_t       *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *,
 251                             char **, int);
 252 static dev_info_t       *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *);
 253 static int              i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int);
 254 static int              i_mdi_is_child_present(dev_info_t *, dev_info_t *);
 255 static mdi_client_t     *i_mdi_client_alloc(mdi_vhci_t *, char *, char *);
 256 static void             i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *);
 257 static void             i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *);
 258 static mdi_client_t     *i_mdi_client_find(mdi_vhci_t *, char *, char *);
 259 static void             i_mdi_client_update_state(mdi_client_t *);
 260 static int              i_mdi_client_compute_state(mdi_client_t *,
 261                             mdi_phci_t *);
 262 static void             i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *);
 263 static void             i_mdi_client_unlock(mdi_client_t *);
 264 static int              i_mdi_client_free(mdi_vhci_t *, mdi_client_t *);
 265 static mdi_client_t     *i_devi_get_client(dev_info_t *);
 266 /*
 267  * NOTE: this will be removed once the NWS files are changed to use the new
 268  * mdi_{enable,disable}_path interfaces
 269  */
 270 static int              i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *,
 271                                 int, int);
 272 static mdi_pathinfo_t   *i_mdi_enable_disable_path(mdi_pathinfo_t *pip,
 273                                 mdi_vhci_t *vh, int flags, int op);
 274 /*
 275  * Failover related function prototypes
 276  */
 277 static int              i_mdi_failover(void *);
 278 
 279 /*
 280  * misc internal functions
 281  */
 282 static int              i_mdi_get_hash_key(char *);
 283 static int              i_map_nvlist_error_to_mdi(int);
 284 static void             i_mdi_report_path_state(mdi_client_t *,
 285                             mdi_pathinfo_t *);
 286 
 287 static void             setup_vhci_cache(mdi_vhci_t *);
 288 static int              destroy_vhci_cache(mdi_vhci_t *);
 289 static int              stop_vhcache_async_threads(mdi_vhci_config_t *);
 290 static boolean_t        stop_vhcache_flush_thread(void *, int);
 291 static void             free_string_array(char **, int);
 292 static void             free_vhcache_phci(mdi_vhcache_phci_t *);
 293 static void             free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *);
 294 static void             free_vhcache_client(mdi_vhcache_client_t *);
 295 static int              mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *);
 296 static nvlist_t         *vhcache_to_mainnvl(mdi_vhci_cache_t *);
 297 static void             vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *);
 298 static void             vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *);
 299 static void             vhcache_pi_add(mdi_vhci_config_t *,
 300                             struct mdi_pathinfo *);
 301 static void             vhcache_pi_remove(mdi_vhci_config_t *,
 302                             struct mdi_pathinfo *);
 303 static void             free_phclient_path_list(mdi_phys_path_t *);
 304 static void             sort_vhcache_paths(mdi_vhcache_client_t *);
 305 static int              flush_vhcache(mdi_vhci_config_t *, int);
 306 static void             vhcache_dirty(mdi_vhci_config_t *);
 307 static void             free_async_client_config(mdi_async_client_config_t *);
 308 static void             single_threaded_vhconfig_enter(mdi_vhci_config_t *);
 309 static void             single_threaded_vhconfig_exit(mdi_vhci_config_t *);
 310 static nvlist_t         *read_on_disk_vhci_cache(char *);
 311 extern int              fread_nvlist(char *, nvlist_t **);
 312 extern int              fwrite_nvlist(char *, nvlist_t *);
 313 
 314 /* called once when first vhci registers with mdi */
 315 static void
 316 i_mdi_init()
 317 {
 318         static int initialized = 0;
 319 
 320         if (initialized)
 321                 return;
 322         initialized = 1;
 323 
 324         mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL);
 325 
 326         /* Create our taskq resources */
 327         mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads,
 328             MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC,
 329             TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
 330         ASSERT(mdi_taskq != NULL);      /* taskq_create never fails */
 331 
 332         /* Allocate ['path_instance' <-> "path"] maps */
 333         mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL);
 334         mdi_pathmap_bypath = mod_hash_create_strhash(
 335             "mdi_pathmap_bypath", mdi_pathmap_hash_size,
 336             mod_hash_null_valdtor);
 337         mdi_pathmap_byinstance = mod_hash_create_idhash(
 338             "mdi_pathmap_byinstance", mdi_pathmap_hash_size,
 339             mod_hash_null_valdtor);
 340         mdi_pathmap_sbyinstance = mod_hash_create_idhash(
 341             "mdi_pathmap_sbyinstance", mdi_pathmap_hash_size,
 342             mod_hash_null_valdtor);
 343 }
 344 
 345 /*
 346  * mdi_get_component_type():
 347  *              Return mpxio component type
 348  * Return Values:
 349  *              MDI_COMPONENT_NONE
 350  *              MDI_COMPONENT_VHCI
 351  *              MDI_COMPONENT_PHCI
 352  *              MDI_COMPONENT_CLIENT
 353  * XXX This doesn't work under multi-level MPxIO and should be
 354  *      removed when clients migrate mdi_component_is_*() interfaces.
 355  */
 356 int
 357 mdi_get_component_type(dev_info_t *dip)
 358 {
 359         return (DEVI(dip)->devi_mdi_component);
 360 }
 361 
 362 /*
 363  * mdi_vhci_register():
 364  *              Register a vHCI module with the mpxio framework
 365  *              mdi_vhci_register() is called by vHCI drivers to register the
 366  *              'class_driver' vHCI driver and its MDI entrypoints with the
 367  *              mpxio framework.  The vHCI driver must call this interface as
 368  *              part of its attach(9e) handler.
 369  *              Competing threads may try to attach mdi_vhci_register() as
 370  *              the vHCI drivers are loaded and attached as a result of pHCI
 371  *              driver instance registration (mdi_phci_register()) with the
 372  *              framework.
 373  * Return Values:
 374  *              MDI_SUCCESS
 375  *              MDI_FAILURE
 376  */
 377 /*ARGSUSED*/
 378 int
 379 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops,
 380     int flags)
 381 {
 382         mdi_vhci_t              *vh = NULL;
 383 
 384         /* Registrant can't be older */
 385         ASSERT(vops->vo_revision <= MDI_VHCI_OPS_REV);
 386 
 387 #ifdef DEBUG
 388         /*
 389          * IB nexus driver is loaded only when IB hardware is present.
 390          * In order to be able to do this there is a need to drive the loading
 391          * and attaching of the IB nexus driver (especially when an IB hardware
 392          * is dynamically plugged in) when an IB HCA driver (PHCI)
 393          * is being attached. Unfortunately this gets into the limitations
 394          * of devfs as there seems to be no clean way to drive configuration
 395          * of a subtree from another subtree of a devfs. Hence, do not ASSERT
 396          * for IB.
 397          */
 398         if (strcmp(class, MDI_HCI_CLASS_IB) != 0)
 399                 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
 400 #endif
 401 
 402         i_mdi_init();
 403 
 404         mutex_enter(&mdi_mutex);
 405         /*
 406          * Scan for already registered vhci
 407          */
 408         for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
 409                 if (strcmp(vh->vh_class, class) == 0) {
 410                         /*
 411                          * vHCI has already been created.  Check for valid
 412                          * vHCI ops registration.  We only support one vHCI
 413                          * module per class
 414                          */
 415                         if (vh->vh_ops != NULL) {
 416                                 mutex_exit(&mdi_mutex);
 417                                 cmn_err(CE_NOTE, vhci_greeting, class);
 418                                 return (MDI_FAILURE);
 419                         }
 420                         break;
 421                 }
 422         }
 423 
 424         /*
 425          * if not yet created, create the vHCI component
 426          */
 427         if (vh == NULL) {
 428                 struct client_hash      *hash = NULL;
 429                 char                    *load_balance;
 430 
 431                 /*
 432                  * Allocate and initialize the mdi extensions
 433                  */
 434                 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP);
 435                 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash),
 436                     KM_SLEEP);
 437                 vh->vh_client_table = hash;
 438                 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP);
 439                 (void) strcpy(vh->vh_class, class);
 440                 vh->vh_lb = LOAD_BALANCE_RR;
 441                 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip,
 442                     0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) {
 443                         if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) {
 444                                 vh->vh_lb = LOAD_BALANCE_NONE;
 445                         } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA)
 446                                     == 0) {
 447                                 vh->vh_lb = LOAD_BALANCE_LBA;
 448                         }
 449                         ddi_prop_free(load_balance);
 450                 }
 451 
 452                 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL);
 453                 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL);
 454 
 455                 /*
 456                  * Store the vHCI ops vectors
 457                  */
 458                 vh->vh_dip = vdip;
 459                 vh->vh_ops = vops;
 460 
 461                 setup_vhci_cache(vh);
 462 
 463                 if (mdi_vhci_head == NULL) {
 464                         mdi_vhci_head = vh;
 465                 }
 466                 if (mdi_vhci_tail) {
 467                         mdi_vhci_tail->vh_next = vh;
 468                 }
 469                 mdi_vhci_tail = vh;
 470                 mdi_vhci_count++;
 471         }
 472 
 473         /*
 474          * Claim the devfs node as a vhci component
 475          */
 476         DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI;
 477 
 478         /*
 479          * Initialize our back reference from dev_info node
 480          */
 481         DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh;
 482         mutex_exit(&mdi_mutex);
 483         return (MDI_SUCCESS);
 484 }
 485 
 486 /*
 487  * mdi_vhci_unregister():
 488  *              Unregister a vHCI module from mpxio framework
 489  *              mdi_vhci_unregister() is called from the detach(9E) entrypoint
 490  *              of a vhci to unregister it from the framework.
 491  * Return Values:
 492  *              MDI_SUCCESS
 493  *              MDI_FAILURE
 494  */
 495 /*ARGSUSED*/
 496 int
 497 mdi_vhci_unregister(dev_info_t *vdip, int flags)
 498 {
 499         mdi_vhci_t      *found, *vh, *prev = NULL;
 500 
 501         ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
 502 
 503         /*
 504          * Check for invalid VHCI
 505          */
 506         if ((vh = i_devi_get_vhci(vdip)) == NULL)
 507                 return (MDI_FAILURE);
 508 
 509         /*
 510          * Scan the list of registered vHCIs for a match
 511          */
 512         mutex_enter(&mdi_mutex);
 513         for (found = mdi_vhci_head; found != NULL; found = found->vh_next) {
 514                 if (found == vh)
 515                         break;
 516                 prev = found;
 517         }
 518 
 519         if (found == NULL) {
 520                 mutex_exit(&mdi_mutex);
 521                 return (MDI_FAILURE);
 522         }
 523 
 524         /*
 525          * Check the vHCI, pHCI and client count. All the pHCIs and clients
 526          * should have been unregistered, before a vHCI can be
 527          * unregistered.
 528          */
 529         MDI_VHCI_PHCI_LOCK(vh);
 530         if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) {
 531                 MDI_VHCI_PHCI_UNLOCK(vh);
 532                 mutex_exit(&mdi_mutex);
 533                 return (MDI_FAILURE);
 534         }
 535         MDI_VHCI_PHCI_UNLOCK(vh);
 536 
 537         if (destroy_vhci_cache(vh) != MDI_SUCCESS) {
 538                 mutex_exit(&mdi_mutex);
 539                 return (MDI_FAILURE);
 540         }
 541 
 542         /*
 543          * Remove the vHCI from the global list
 544          */
 545         if (vh == mdi_vhci_head) {
 546                 mdi_vhci_head = vh->vh_next;
 547         } else {
 548                 prev->vh_next = vh->vh_next;
 549         }
 550         if (vh == mdi_vhci_tail) {
 551                 mdi_vhci_tail = prev;
 552         }
 553         mdi_vhci_count--;
 554         mutex_exit(&mdi_mutex);
 555 
 556         vh->vh_ops = NULL;
 557         DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI;
 558         DEVI(vdip)->devi_mdi_xhci = NULL;
 559         kmem_free(vh->vh_class, strlen(vh->vh_class)+1);
 560         kmem_free(vh->vh_client_table,
 561             mdi_client_table_size * sizeof (struct client_hash));
 562         mutex_destroy(&vh->vh_phci_mutex);
 563         mutex_destroy(&vh->vh_client_mutex);
 564 
 565         kmem_free(vh, sizeof (mdi_vhci_t));
 566         return (MDI_SUCCESS);
 567 }
 568 
 569 /*
 570  * i_mdi_vhci_class2vhci():
 571  *              Look for a matching vHCI module given a vHCI class name
 572  * Return Values:
 573  *              Handle to a vHCI component
 574  *              NULL
 575  */
 576 static mdi_vhci_t *
 577 i_mdi_vhci_class2vhci(char *class)
 578 {
 579         mdi_vhci_t      *vh = NULL;
 580 
 581         ASSERT(!MUTEX_HELD(&mdi_mutex));
 582 
 583         mutex_enter(&mdi_mutex);
 584         for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
 585                 if (strcmp(vh->vh_class, class) == 0) {
 586                         break;
 587                 }
 588         }
 589         mutex_exit(&mdi_mutex);
 590         return (vh);
 591 }
 592 
 593 /*
 594  * i_devi_get_vhci():
 595  *              Utility function to get the handle to a vHCI component
 596  * Return Values:
 597  *              Handle to a vHCI component
 598  *              NULL
 599  */
 600 mdi_vhci_t *
 601 i_devi_get_vhci(dev_info_t *vdip)
 602 {
 603         mdi_vhci_t      *vh = NULL;
 604         if (MDI_VHCI(vdip)) {
 605                 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci;
 606         }
 607         return (vh);
 608 }
 609 
 610 /*
 611  * mdi_phci_register():
 612  *              Register a pHCI module with mpxio framework
 613  *              mdi_phci_register() is called by pHCI drivers to register with
 614  *              the mpxio framework and a specific 'class_driver' vHCI.  The
 615  *              pHCI driver must call this interface as part of its attach(9e)
 616  *              handler.
 617  * Return Values:
 618  *              MDI_SUCCESS
 619  *              MDI_FAILURE
 620  */
 621 /*ARGSUSED*/
 622 int
 623 mdi_phci_register(char *class, dev_info_t *pdip, int flags)
 624 {
 625         mdi_phci_t              *ph;
 626         mdi_vhci_t              *vh;
 627         char                    *data;
 628 
 629         /*
 630          * Some subsystems, like fcp, perform pHCI registration from a
 631          * different thread than the one doing the pHCI attach(9E) - the
 632          * driver attach code is waiting for this other thread to complete.
 633          * This means we can only ASSERT DEVI_BUSY_CHANGING of parent
 634          * (indicating that some thread has done an ndi_devi_enter of parent)
 635          * not DEVI_BUSY_OWNED (which would indicate that we did the enter).
 636          */
 637         ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
 638 
 639         /*
 640          * Check for mpxio-disable property. Enable mpxio if the property is
 641          * missing or not set to "yes".
 642          * If the property is set to "yes" then emit a brief message.
 643          */
 644         if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable",
 645             &data) == DDI_SUCCESS)) {
 646                 if (strcmp(data, "yes") == 0) {
 647                         MDI_DEBUG(1, (MDI_CONT, pdip,
 648                             "?multipath capabilities disabled via %s.conf.",
 649                             ddi_driver_name(pdip)));
 650                         ddi_prop_free(data);
 651                         return (MDI_FAILURE);
 652                 }
 653                 ddi_prop_free(data);
 654         }
 655 
 656         /*
 657          * Search for a matching vHCI
 658          */
 659         vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
 660         if (vh == NULL) {
 661                 return (MDI_FAILURE);
 662         }
 663 
 664         ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP);
 665         mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL);
 666         ph->ph_dip = pdip;
 667         ph->ph_vhci = vh;
 668         ph->ph_next = NULL;
 669         ph->ph_unstable = 0;
 670         ph->ph_vprivate = 0;
 671         cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL);
 672 
 673         MDI_PHCI_LOCK(ph);
 674         MDI_PHCI_SET_POWER_UP(ph);
 675         MDI_PHCI_UNLOCK(ph);
 676         DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI;
 677         DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph;
 678 
 679         vhcache_phci_add(vh->vh_config, ph);
 680 
 681         MDI_VHCI_PHCI_LOCK(vh);
 682         if (vh->vh_phci_head == NULL) {
 683                 vh->vh_phci_head = ph;
 684         }
 685         if (vh->vh_phci_tail) {
 686                 vh->vh_phci_tail->ph_next = ph;
 687         }
 688         vh->vh_phci_tail = ph;
 689         vh->vh_phci_count++;
 690         MDI_VHCI_PHCI_UNLOCK(vh);
 691 
 692         i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER);
 693         return (MDI_SUCCESS);
 694 }
 695 
 696 /*
 697  * mdi_phci_unregister():
 698  *              Unregister a pHCI module from mpxio framework
 699  *              mdi_phci_unregister() is called by the pHCI drivers from their
 700  *              detach(9E) handler to unregister their instances from the
 701  *              framework.
 702  * Return Values:
 703  *              MDI_SUCCESS
 704  *              MDI_FAILURE
 705  */
 706 /*ARGSUSED*/
 707 int
 708 mdi_phci_unregister(dev_info_t *pdip, int flags)
 709 {
 710         mdi_vhci_t              *vh;
 711         mdi_phci_t              *ph;
 712         mdi_phci_t              *tmp;
 713         mdi_phci_t              *prev = NULL;
 714         mdi_pathinfo_t          *pip;
 715 
 716         ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
 717 
 718         ph = i_devi_get_phci(pdip);
 719         if (ph == NULL) {
 720                 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid pHCI"));
 721                 return (MDI_FAILURE);
 722         }
 723 
 724         vh = ph->ph_vhci;
 725         ASSERT(vh != NULL);
 726         if (vh == NULL) {
 727                 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid vHCI"));
 728                 return (MDI_FAILURE);
 729         }
 730 
 731         MDI_VHCI_PHCI_LOCK(vh);
 732         tmp = vh->vh_phci_head;
 733         while (tmp) {
 734                 if (tmp == ph) {
 735                         break;
 736                 }
 737                 prev = tmp;
 738                 tmp = tmp->ph_next;
 739         }
 740 
 741         if (ph == vh->vh_phci_head) {
 742                 vh->vh_phci_head = ph->ph_next;
 743         } else {
 744                 prev->ph_next = ph->ph_next;
 745         }
 746 
 747         if (ph == vh->vh_phci_tail) {
 748                 vh->vh_phci_tail = prev;
 749         }
 750 
 751         vh->vh_phci_count--;
 752         MDI_VHCI_PHCI_UNLOCK(vh);
 753 
 754         /* Walk remaining pathinfo nodes and disassociate them from pHCI */
 755         MDI_PHCI_LOCK(ph);
 756         for (pip = (mdi_pathinfo_t *)ph->ph_path_head; pip;
 757             pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link)
 758                 MDI_PI(pip)->pi_phci = NULL;
 759         MDI_PHCI_UNLOCK(ph);
 760 
 761         i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class,
 762             ESC_DDI_INITIATOR_UNREGISTER);
 763         vhcache_phci_remove(vh->vh_config, ph);
 764         cv_destroy(&ph->ph_unstable_cv);
 765         mutex_destroy(&ph->ph_mutex);
 766         kmem_free(ph, sizeof (mdi_phci_t));
 767         DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI;
 768         DEVI(pdip)->devi_mdi_xhci = NULL;
 769         return (MDI_SUCCESS);
 770 }
 771 
 772 /*
 773  * i_devi_get_phci():
 774  *              Utility function to return the phci extensions.
 775  */
 776 static mdi_phci_t *
 777 i_devi_get_phci(dev_info_t *pdip)
 778 {
 779         mdi_phci_t      *ph = NULL;
 780 
 781         if (MDI_PHCI(pdip)) {
 782                 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci;
 783         }
 784         return (ph);
 785 }
 786 
 787 /*
 788  * Single thread mdi entry into devinfo node for modifying its children.
 789  * If necessary we perform an ndi_devi_enter of the vHCI before doing
 790  * an ndi_devi_enter of 'dip'.  We maintain circular in two parts: one
 791  * for the vHCI and one for the pHCI.
 792  */
 793 void
 794 mdi_devi_enter(dev_info_t *phci_dip, int *circular)
 795 {
 796         dev_info_t      *vdip;
 797         int             vcircular, pcircular;
 798 
 799         /* Verify calling context */
 800         ASSERT(MDI_PHCI(phci_dip));
 801         vdip = mdi_devi_get_vdip(phci_dip);
 802         ASSERT(vdip);                   /* A pHCI always has a vHCI */
 803 
 804         /*
 805          * If pHCI is detaching then the framework has already entered the
 806          * vHCI on a threads that went down the code path leading to
 807          * detach_node().  This framework enter of the vHCI during pHCI
 808          * detach is done to avoid deadlock with vHCI power management
 809          * operations which enter the vHCI and the enter down the path
 810          * to the pHCI. If pHCI is detaching then we piggyback this calls
 811          * enter of the vHCI on frameworks vHCI enter that has already
 812          * occurred - this is OK because we know that the framework thread
 813          * doing detach is waiting for our completion.
 814          *
 815          * We should DEVI_IS_DETACHING under an enter of the parent to avoid
 816          * race with detach - but we can't do that because the framework has
 817          * already entered the parent, so we have some complexity instead.
 818          */
 819         for (;;) {
 820                 if (ndi_devi_tryenter(vdip, &vcircular)) {
 821                         ASSERT(vcircular != -1);
 822                         if (DEVI_IS_DETACHING(phci_dip)) {
 823                                 ndi_devi_exit(vdip, vcircular);
 824                                 vcircular = -1;
 825                         }
 826                         break;
 827                 } else if (DEVI_IS_DETACHING(phci_dip)) {
 828                         vcircular = -1;
 829                         break;
 830                 } else if (servicing_interrupt()) {
 831                         /*
 832                          * Don't delay an interrupt (and ensure adaptive
 833                          * mutex inversion support).
 834                          */
 835                         ndi_devi_enter(vdip, &vcircular);
 836                         break;
 837                 } else {
 838                         delay_random(mdi_delay);
 839                 }
 840         }
 841 
 842         ndi_devi_enter(phci_dip, &pcircular);
 843         *circular = (vcircular << 16) | (pcircular & 0xFFFF);
 844 }
 845 
 846 /*
 847  * Attempt to mdi_devi_enter.
 848  */
 849 int
 850 mdi_devi_tryenter(dev_info_t *phci_dip, int *circular)
 851 {
 852         dev_info_t      *vdip;
 853         int             vcircular, pcircular;
 854 
 855         /* Verify calling context */
 856         ASSERT(MDI_PHCI(phci_dip));
 857         vdip = mdi_devi_get_vdip(phci_dip);
 858         ASSERT(vdip);                   /* A pHCI always has a vHCI */
 859 
 860         if (ndi_devi_tryenter(vdip, &vcircular)) {
 861                 if (ndi_devi_tryenter(phci_dip, &pcircular)) {
 862                         *circular = (vcircular << 16) | (pcircular & 0xFFFF);
 863                         return (1);     /* locked */
 864                 }
 865                 ndi_devi_exit(vdip, vcircular);
 866         }
 867         return (0);                     /* busy */
 868 }
 869 
 870 /*
 871  * Release mdi_devi_enter or successful mdi_devi_tryenter.
 872  */
 873 void
 874 mdi_devi_exit(dev_info_t *phci_dip, int circular)
 875 {
 876         dev_info_t      *vdip;
 877         int             vcircular, pcircular;
 878 
 879         /* Verify calling context */
 880         ASSERT(MDI_PHCI(phci_dip));
 881         vdip = mdi_devi_get_vdip(phci_dip);
 882         ASSERT(vdip);                   /* A pHCI always has a vHCI */
 883 
 884         /* extract two circular recursion values from single int */
 885         pcircular = (short)(circular & 0xFFFF);
 886         vcircular = (short)((circular >> 16) & 0xFFFF);
 887 
 888         ndi_devi_exit(phci_dip, pcircular);
 889         if (vcircular != -1)
 890                 ndi_devi_exit(vdip, vcircular);
 891 }
 892 
 893 /*
 894  * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used
 895  * around a pHCI drivers calls to mdi_pi_online/offline, after holding
 896  * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock
 897  * with vHCI power management code during path online/offline.  Each
 898  * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must
 899  * occur within the scope of an active mdi_devi_enter that establishes the
 900  * circular value.
 901  */
 902 void
 903 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular)
 904 {
 905         int             pcircular;
 906 
 907         /* Verify calling context */
 908         ASSERT(MDI_PHCI(phci_dip));
 909 
 910         /* Keep hold on pHCI until we reenter in mdi_devi_enter_phci */
 911         ndi_hold_devi(phci_dip);
 912 
 913         pcircular = (short)(circular & 0xFFFF);
 914         ndi_devi_exit(phci_dip, pcircular);
 915 }
 916 
 917 void
 918 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular)
 919 {
 920         int             pcircular;
 921 
 922         /* Verify calling context */
 923         ASSERT(MDI_PHCI(phci_dip));
 924 
 925         ndi_devi_enter(phci_dip, &pcircular);
 926 
 927         /* Drop hold from mdi_devi_exit_phci. */
 928         ndi_rele_devi(phci_dip);
 929 
 930         /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */
 931         ASSERT(pcircular == ((short)(*circular & 0xFFFF)));
 932 }
 933 
 934 /*
 935  * mdi_devi_get_vdip():
 936  *              given a pHCI dip return vHCI dip
 937  */
 938 dev_info_t *
 939 mdi_devi_get_vdip(dev_info_t *pdip)
 940 {
 941         mdi_phci_t      *ph;
 942 
 943         ph = i_devi_get_phci(pdip);
 944         if (ph && ph->ph_vhci)
 945                 return (ph->ph_vhci->vh_dip);
 946         return (NULL);
 947 }
 948 
 949 /*
 950  * mdi_devi_pdip_entered():
 951  *              Return 1 if we are vHCI and have done an ndi_devi_enter
 952  *              of a pHCI
 953  */
 954 int
 955 mdi_devi_pdip_entered(dev_info_t *vdip)
 956 {
 957         mdi_vhci_t      *vh;
 958         mdi_phci_t      *ph;
 959 
 960         vh = i_devi_get_vhci(vdip);
 961         if (vh == NULL)
 962                 return (0);
 963 
 964         MDI_VHCI_PHCI_LOCK(vh);
 965         ph = vh->vh_phci_head;
 966         while (ph) {
 967                 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) {
 968                         MDI_VHCI_PHCI_UNLOCK(vh);
 969                         return (1);
 970                 }
 971                 ph = ph->ph_next;
 972         }
 973         MDI_VHCI_PHCI_UNLOCK(vh);
 974         return (0);
 975 }
 976 
 977 /*
 978  * mdi_phci_path2devinfo():
 979  *              Utility function to search for a valid phci device given
 980  *              the devfs pathname.
 981  */
 982 dev_info_t *
 983 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname)
 984 {
 985         char            *temp_pathname;
 986         mdi_vhci_t      *vh;
 987         mdi_phci_t      *ph;
 988         dev_info_t      *pdip = NULL;
 989 
 990         vh = i_devi_get_vhci(vdip);
 991         ASSERT(vh != NULL);
 992 
 993         if (vh == NULL) {
 994                 /*
 995                  * Invalid vHCI component, return failure
 996                  */
 997                 return (NULL);
 998         }
 999 
1000         temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1001         MDI_VHCI_PHCI_LOCK(vh);
1002         ph = vh->vh_phci_head;
1003         while (ph != NULL) {
1004                 pdip = ph->ph_dip;
1005                 ASSERT(pdip != NULL);
1006                 *temp_pathname = '\0';
1007                 (void) ddi_pathname(pdip, temp_pathname);
1008                 if (strcmp(temp_pathname, pathname) == 0) {
1009                         break;
1010                 }
1011                 ph = ph->ph_next;
1012         }
1013         if (ph == NULL) {
1014                 pdip = NULL;
1015         }
1016         MDI_VHCI_PHCI_UNLOCK(vh);
1017         kmem_free(temp_pathname, MAXPATHLEN);
1018         return (pdip);
1019 }
1020 
1021 /*
1022  * mdi_phci_get_path_count():
1023  *              get number of path information nodes associated with a given
1024  *              pHCI device.
1025  */
1026 int
1027 mdi_phci_get_path_count(dev_info_t *pdip)
1028 {
1029         mdi_phci_t      *ph;
1030         int             count = 0;
1031 
1032         ph = i_devi_get_phci(pdip);
1033         if (ph != NULL) {
1034                 count = ph->ph_path_count;
1035         }
1036         return (count);
1037 }
1038 
1039 /*
1040  * i_mdi_phci_lock():
1041  *              Lock a pHCI device
1042  * Return Values:
1043  *              None
1044  * Note:
1045  *              The default locking order is:
1046  *              _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
1047  *              But there are number of situations where locks need to be
1048  *              grabbed in reverse order.  This routine implements try and lock
1049  *              mechanism depending on the requested parameter option.
1050  */
1051 static void
1052 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip)
1053 {
1054         if (pip) {
1055                 /* Reverse locking is requested. */
1056                 while (MDI_PHCI_TRYLOCK(ph) == 0) {
1057                         if (servicing_interrupt()) {
1058                                 MDI_PI_HOLD(pip);
1059                                 MDI_PI_UNLOCK(pip);
1060                                 MDI_PHCI_LOCK(ph);
1061                                 MDI_PI_LOCK(pip);
1062                                 MDI_PI_RELE(pip);
1063                                 break;
1064                         } else {
1065                                 /*
1066                                  * tryenter failed. Try to grab again
1067                                  * after a small delay
1068                                  */
1069                                 MDI_PI_HOLD(pip);
1070                                 MDI_PI_UNLOCK(pip);
1071                                 delay_random(mdi_delay);
1072                                 MDI_PI_LOCK(pip);
1073                                 MDI_PI_RELE(pip);
1074                         }
1075                 }
1076         } else {
1077                 MDI_PHCI_LOCK(ph);
1078         }
1079 }
1080 
1081 /*
1082  * i_mdi_phci_unlock():
1083  *              Unlock the pHCI component
1084  */
1085 static void
1086 i_mdi_phci_unlock(mdi_phci_t *ph)
1087 {
1088         MDI_PHCI_UNLOCK(ph);
1089 }
1090 
1091 /*
1092  * i_mdi_devinfo_create():
1093  *              create client device's devinfo node
1094  * Return Values:
1095  *              dev_info
1096  *              NULL
1097  * Notes:
1098  */
1099 static dev_info_t *
1100 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid,
1101         char **compatible, int ncompatible)
1102 {
1103         dev_info_t *cdip = NULL;
1104 
1105         ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1106 
1107         /* Verify for duplicate entry */
1108         cdip = i_mdi_devinfo_find(vh, name, guid);
1109         ASSERT(cdip == NULL);
1110         if (cdip) {
1111                 cmn_err(CE_WARN,
1112                     "i_mdi_devinfo_create: client %s@%s already exists",
1113                         name ? name : "", guid ? guid : "");
1114         }
1115 
1116         ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip);
1117         if (cdip == NULL)
1118                 goto fail;
1119 
1120         /*
1121          * Create component type and Global unique identifier
1122          * properties
1123          */
1124         if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip,
1125             MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) {
1126                 goto fail;
1127         }
1128 
1129         /* Decorate the node with compatible property */
1130         if (compatible &&
1131             (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip,
1132             "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) {
1133                 goto fail;
1134         }
1135 
1136         return (cdip);
1137 
1138 fail:
1139         if (cdip) {
1140                 (void) ndi_prop_remove_all(cdip);
1141                 (void) ndi_devi_free(cdip);
1142         }
1143         return (NULL);
1144 }
1145 
1146 /*
1147  * i_mdi_devinfo_find():
1148  *              Find a matching devinfo node for given client node name
1149  *              and its guid.
1150  * Return Values:
1151  *              Handle to a dev_info node or NULL
1152  */
1153 static dev_info_t *
1154 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid)
1155 {
1156         char                    *data;
1157         dev_info_t              *cdip = NULL;
1158         dev_info_t              *ndip = NULL;
1159         int                     circular;
1160 
1161         ndi_devi_enter(vh->vh_dip, &circular);
1162         ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child;
1163         while ((cdip = ndip) != NULL) {
1164                 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1165 
1166                 if (strcmp(DEVI(cdip)->devi_node_name, name)) {
1167                         continue;
1168                 }
1169 
1170                 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip,
1171                     DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP,
1172                     &data) != DDI_PROP_SUCCESS) {
1173                         continue;
1174                 }
1175 
1176                 if (strcmp(data, guid) != 0) {
1177                         ddi_prop_free(data);
1178                         continue;
1179                 }
1180                 ddi_prop_free(data);
1181                 break;
1182         }
1183         ndi_devi_exit(vh->vh_dip, circular);
1184         return (cdip);
1185 }
1186 
1187 /*
1188  * i_mdi_devinfo_remove():
1189  *              Remove a client device node
1190  */
1191 static int
1192 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags)
1193 {
1194         int     rv = MDI_SUCCESS;
1195 
1196         if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS ||
1197             (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) {
1198                 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN | NDI_DEVI_REMOVE);
1199                 if (rv != NDI_SUCCESS) {
1200                         MDI_DEBUG(1, (MDI_NOTE, cdip,
1201                             "!failed: cdip %p", (void *)cdip));
1202                 }
1203                 /*
1204                  * Convert to MDI error code
1205                  */
1206                 switch (rv) {
1207                 case NDI_SUCCESS:
1208                         rv = MDI_SUCCESS;
1209                         break;
1210                 case NDI_BUSY:
1211                         rv = MDI_BUSY;
1212                         break;
1213                 default:
1214                         rv = MDI_FAILURE;
1215                         break;
1216                 }
1217         }
1218         return (rv);
1219 }
1220 
1221 /*
1222  * i_devi_get_client()
1223  *              Utility function to get mpxio component extensions
1224  */
1225 static mdi_client_t *
1226 i_devi_get_client(dev_info_t *cdip)
1227 {
1228         mdi_client_t    *ct = NULL;
1229 
1230         if (MDI_CLIENT(cdip)) {
1231                 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client;
1232         }
1233         return (ct);
1234 }
1235 
1236 /*
1237  * i_mdi_is_child_present():
1238  *              Search for the presence of client device dev_info node
1239  */
1240 static int
1241 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip)
1242 {
1243         int             rv = MDI_FAILURE;
1244         struct dev_info *dip;
1245         int             circular;
1246 
1247         ndi_devi_enter(vdip, &circular);
1248         dip = DEVI(vdip)->devi_child;
1249         while (dip) {
1250                 if (dip == DEVI(cdip)) {
1251                         rv = MDI_SUCCESS;
1252                         break;
1253                 }
1254                 dip = dip->devi_sibling;
1255         }
1256         ndi_devi_exit(vdip, circular);
1257         return (rv);
1258 }
1259 
1260 
1261 /*
1262  * i_mdi_client_lock():
1263  *              Grab client component lock
1264  * Return Values:
1265  *              None
1266  * Note:
1267  *              The default locking order is:
1268  *              _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
1269  *              But there are number of situations where locks need to be
1270  *              grabbed in reverse order.  This routine implements try and lock
1271  *              mechanism depending on the requested parameter option.
1272  */
1273 static void
1274 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip)
1275 {
1276         if (pip) {
1277                 /*
1278                  * Reverse locking is requested.
1279                  */
1280                 while (MDI_CLIENT_TRYLOCK(ct) == 0) {
1281                         if (servicing_interrupt()) {
1282                                 MDI_PI_HOLD(pip);
1283                                 MDI_PI_UNLOCK(pip);
1284                                 MDI_CLIENT_LOCK(ct);
1285                                 MDI_PI_LOCK(pip);
1286                                 MDI_PI_RELE(pip);
1287                                 break;
1288                         } else {
1289                                 /*
1290                                  * tryenter failed. Try to grab again
1291                                  * after a small delay
1292                                  */
1293                                 MDI_PI_HOLD(pip);
1294                                 MDI_PI_UNLOCK(pip);
1295                                 delay_random(mdi_delay);
1296                                 MDI_PI_LOCK(pip);
1297                                 MDI_PI_RELE(pip);
1298                         }
1299                 }
1300         } else {
1301                 MDI_CLIENT_LOCK(ct);
1302         }
1303 }
1304 
1305 /*
1306  * i_mdi_client_unlock():
1307  *              Unlock a client component
1308  */
1309 static void
1310 i_mdi_client_unlock(mdi_client_t *ct)
1311 {
1312         MDI_CLIENT_UNLOCK(ct);
1313 }
1314 
1315 /*
1316  * i_mdi_client_alloc():
1317  *              Allocate and initialize a client structure.  Caller should
1318  *              hold the vhci client lock.
1319  * Return Values:
1320  *              Handle to a client component
1321  */
1322 /*ARGSUSED*/
1323 static mdi_client_t *
1324 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid)
1325 {
1326         mdi_client_t    *ct;
1327 
1328         ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1329 
1330         /*
1331          * Allocate and initialize a component structure.
1332          */
1333         ct = kmem_zalloc(sizeof (*ct), KM_SLEEP);
1334         mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
1335         ct->ct_hnext = NULL;
1336         ct->ct_hprev = NULL;
1337         ct->ct_dip = NULL;
1338         ct->ct_vhci = vh;
1339         ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP);
1340         (void) strcpy(ct->ct_drvname, name);
1341         ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP);
1342         (void) strcpy(ct->ct_guid, lguid);
1343         ct->ct_cprivate = NULL;
1344         ct->ct_vprivate = NULL;
1345         ct->ct_flags = 0;
1346         ct->ct_state = MDI_CLIENT_STATE_FAILED;
1347         MDI_CLIENT_LOCK(ct);
1348         MDI_CLIENT_SET_OFFLINE(ct);
1349         MDI_CLIENT_SET_DETACH(ct);
1350         MDI_CLIENT_SET_POWER_UP(ct);
1351         MDI_CLIENT_UNLOCK(ct);
1352         ct->ct_failover_flags = 0;
1353         ct->ct_failover_status = 0;
1354         cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL);
1355         ct->ct_unstable = 0;
1356         cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL);
1357         cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL);
1358         ct->ct_lb = vh->vh_lb;
1359         ct->ct_lb_args =  kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP);
1360         ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE;
1361         ct->ct_path_count = 0;
1362         ct->ct_path_head = NULL;
1363         ct->ct_path_tail = NULL;
1364         ct->ct_path_last = NULL;
1365 
1366         /*
1367          * Add this client component to our client hash queue
1368          */
1369         i_mdi_client_enlist_table(vh, ct);
1370         return (ct);
1371 }
1372 
1373 /*
1374  * i_mdi_client_enlist_table():
1375  *              Attach the client device to the client hash table. Caller
1376  *              should hold the vhci client lock.
1377  */
1378 static void
1379 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1380 {
1381         int                     index;
1382         struct client_hash      *head;
1383 
1384         ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1385 
1386         index = i_mdi_get_hash_key(ct->ct_guid);
1387         head = &vh->vh_client_table[index];
1388         ct->ct_hnext = (mdi_client_t *)head->ct_hash_head;
1389         head->ct_hash_head = ct;
1390         head->ct_hash_count++;
1391         vh->vh_client_count++;
1392 }
1393 
1394 /*
1395  * i_mdi_client_delist_table():
1396  *              Attach the client device to the client hash table.
1397  *              Caller should hold the vhci client lock.
1398  */
1399 static void
1400 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1401 {
1402         int                     index;
1403         char                    *guid;
1404         struct client_hash      *head;
1405         mdi_client_t            *next;
1406         mdi_client_t            *last;
1407 
1408         ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1409 
1410         guid = ct->ct_guid;
1411         index = i_mdi_get_hash_key(guid);
1412         head = &vh->vh_client_table[index];
1413 
1414         last = NULL;
1415         next = (mdi_client_t *)head->ct_hash_head;
1416         while (next != NULL) {
1417                 if (next == ct) {
1418                         break;
1419                 }
1420                 last = next;
1421                 next = next->ct_hnext;
1422         }
1423 
1424         if (next) {
1425                 head->ct_hash_count--;
1426                 if (last == NULL) {
1427                         head->ct_hash_head = ct->ct_hnext;
1428                 } else {
1429                         last->ct_hnext = ct->ct_hnext;
1430                 }
1431                 ct->ct_hnext = NULL;
1432                 vh->vh_client_count--;
1433         }
1434 }
1435 
1436 
1437 /*
1438  * i_mdi_client_free():
1439  *              Free a client component
1440  */
1441 static int
1442 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct)
1443 {
1444         int             rv = MDI_SUCCESS;
1445         int             flags = ct->ct_flags;
1446         dev_info_t      *cdip;
1447         dev_info_t      *vdip;
1448 
1449         ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1450 
1451         vdip = vh->vh_dip;
1452         cdip = ct->ct_dip;
1453 
1454         (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP);
1455         DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT;
1456         DEVI(cdip)->devi_mdi_client = NULL;
1457 
1458         /*
1459          * Clear out back ref. to dev_info_t node
1460          */
1461         ct->ct_dip = NULL;
1462 
1463         /*
1464          * Remove this client from our hash queue
1465          */
1466         i_mdi_client_delist_table(vh, ct);
1467 
1468         /*
1469          * Uninitialize and free the component
1470          */
1471         kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1);
1472         kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1);
1473         kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t));
1474         cv_destroy(&ct->ct_failover_cv);
1475         cv_destroy(&ct->ct_unstable_cv);
1476         cv_destroy(&ct->ct_powerchange_cv);
1477         mutex_destroy(&ct->ct_mutex);
1478         kmem_free(ct, sizeof (*ct));
1479 
1480         if (cdip != NULL) {
1481                 MDI_VHCI_CLIENT_UNLOCK(vh);
1482                 (void) i_mdi_devinfo_remove(vdip, cdip, flags);
1483                 MDI_VHCI_CLIENT_LOCK(vh);
1484         }
1485         return (rv);
1486 }
1487 
1488 /*
1489  * i_mdi_client_find():
1490  *              Find the client structure corresponding to a given guid
1491  *              Caller should hold the vhci client lock.
1492  */
1493 static mdi_client_t *
1494 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid)
1495 {
1496         int                     index;
1497         struct client_hash      *head;
1498         mdi_client_t            *ct;
1499 
1500         ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1501 
1502         index = i_mdi_get_hash_key(guid);
1503         head = &vh->vh_client_table[index];
1504 
1505         ct = head->ct_hash_head;
1506         while (ct != NULL) {
1507                 if (strcmp(ct->ct_guid, guid) == 0 &&
1508                     (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) {
1509                         break;
1510                 }
1511                 ct = ct->ct_hnext;
1512         }
1513         return (ct);
1514 }
1515 
1516 /*
1517  * i_mdi_client_update_state():
1518  *              Compute and update client device state
1519  * Notes:
1520  *              A client device can be in any of three possible states:
1521  *
1522  *              MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more
1523  *              one online/standby paths. Can tolerate failures.
1524  *              MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with
1525  *              no alternate paths available as standby. A failure on the online
1526  *              would result in loss of access to device data.
1527  *              MDI_CLIENT_STATE_FAILED - Client device in failed state with
1528  *              no paths available to access the device.
1529  */
1530 static void
1531 i_mdi_client_update_state(mdi_client_t *ct)
1532 {
1533         int state;
1534 
1535         ASSERT(MDI_CLIENT_LOCKED(ct));
1536         state = i_mdi_client_compute_state(ct, NULL);
1537         MDI_CLIENT_SET_STATE(ct, state);
1538 }
1539 
1540 /*
1541  * i_mdi_client_compute_state():
1542  *              Compute client device state
1543  *
1544  *              mdi_phci_t *    Pointer to pHCI structure which should
1545  *                              while computing the new value.  Used by
1546  *                              i_mdi_phci_offline() to find the new
1547  *                              client state after DR of a pHCI.
1548  */
1549 static int
1550 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph)
1551 {
1552         int             state;
1553         int             online_count = 0;
1554         int             standby_count = 0;
1555         mdi_pathinfo_t  *pip, *next;
1556 
1557         ASSERT(MDI_CLIENT_LOCKED(ct));
1558         pip = ct->ct_path_head;
1559         while (pip != NULL) {
1560                 MDI_PI_LOCK(pip);
1561                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1562                 if (MDI_PI(pip)->pi_phci == ph) {
1563                         MDI_PI_UNLOCK(pip);
1564                         pip = next;
1565                         continue;
1566                 }
1567 
1568                 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1569                                 == MDI_PATHINFO_STATE_ONLINE)
1570                         online_count++;
1571                 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1572                                 == MDI_PATHINFO_STATE_STANDBY)
1573                         standby_count++;
1574                 MDI_PI_UNLOCK(pip);
1575                 pip = next;
1576         }
1577 
1578         if (online_count == 0) {
1579                 if (standby_count == 0) {
1580                         state = MDI_CLIENT_STATE_FAILED;
1581                         MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
1582                             "client state failed: ct = %p", (void *)ct));
1583                 } else if (standby_count == 1) {
1584                         state = MDI_CLIENT_STATE_DEGRADED;
1585                 } else {
1586                         state = MDI_CLIENT_STATE_OPTIMAL;
1587                 }
1588         } else if (online_count == 1) {
1589                 if (standby_count == 0) {
1590                         state = MDI_CLIENT_STATE_DEGRADED;
1591                 } else {
1592                         state = MDI_CLIENT_STATE_OPTIMAL;
1593                 }
1594         } else {
1595                 state = MDI_CLIENT_STATE_OPTIMAL;
1596         }
1597         return (state);
1598 }
1599 
1600 /*
1601  * i_mdi_client2devinfo():
1602  *              Utility function
1603  */
1604 dev_info_t *
1605 i_mdi_client2devinfo(mdi_client_t *ct)
1606 {
1607         return (ct->ct_dip);
1608 }
1609 
1610 /*
1611  * mdi_client_path2_devinfo():
1612  *              Given the parent devinfo and child devfs pathname, search for
1613  *              a valid devfs node handle.
1614  */
1615 dev_info_t *
1616 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname)
1617 {
1618         dev_info_t      *cdip = NULL;
1619         dev_info_t      *ndip = NULL;
1620         char            *temp_pathname;
1621         int             circular;
1622 
1623         /*
1624          * Allocate temp buffer
1625          */
1626         temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1627 
1628         /*
1629          * Lock parent against changes
1630          */
1631         ndi_devi_enter(vdip, &circular);
1632         ndip = (dev_info_t *)DEVI(vdip)->devi_child;
1633         while ((cdip = ndip) != NULL) {
1634                 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1635 
1636                 *temp_pathname = '\0';
1637                 (void) ddi_pathname(cdip, temp_pathname);
1638                 if (strcmp(temp_pathname, pathname) == 0) {
1639                         break;
1640                 }
1641         }
1642         /*
1643          * Release devinfo lock
1644          */
1645         ndi_devi_exit(vdip, circular);
1646 
1647         /*
1648          * Free the temp buffer
1649          */
1650         kmem_free(temp_pathname, MAXPATHLEN);
1651         return (cdip);
1652 }
1653 
1654 /*
1655  * mdi_client_get_path_count():
1656  *              Utility function to get number of path information nodes
1657  *              associated with a given client device.
1658  */
1659 int
1660 mdi_client_get_path_count(dev_info_t *cdip)
1661 {
1662         mdi_client_t    *ct;
1663         int             count = 0;
1664 
1665         ct = i_devi_get_client(cdip);
1666         if (ct != NULL) {
1667                 count = ct->ct_path_count;
1668         }
1669         return (count);
1670 }
1671 
1672 
1673 /*
1674  * i_mdi_get_hash_key():
1675  *              Create a hash using strings as keys
1676  *
1677  */
1678 static int
1679 i_mdi_get_hash_key(char *str)
1680 {
1681         uint32_t        g, hash = 0;
1682         char            *p;
1683 
1684         for (p = str; *p != '\0'; p++) {
1685                 g = *p;
1686                 hash += g;
1687         }
1688         return (hash % (CLIENT_HASH_TABLE_SIZE - 1));
1689 }
1690 
1691 /*
1692  * mdi_get_lb_policy():
1693  *              Get current load balancing policy for a given client device
1694  */
1695 client_lb_t
1696 mdi_get_lb_policy(dev_info_t *cdip)
1697 {
1698         client_lb_t     lb = LOAD_BALANCE_NONE;
1699         mdi_client_t    *ct;
1700 
1701         ct = i_devi_get_client(cdip);
1702         if (ct != NULL) {
1703                 lb = ct->ct_lb;
1704         }
1705         return (lb);
1706 }
1707 
1708 /*
1709  * mdi_set_lb_region_size():
1710  *              Set current region size for the load-balance
1711  */
1712 int
1713 mdi_set_lb_region_size(dev_info_t *cdip, int region_size)
1714 {
1715         mdi_client_t    *ct;
1716         int             rv = MDI_FAILURE;
1717 
1718         ct = i_devi_get_client(cdip);
1719         if (ct != NULL && ct->ct_lb_args != NULL) {
1720                 ct->ct_lb_args->region_size = region_size;
1721                 rv = MDI_SUCCESS;
1722         }
1723         return (rv);
1724 }
1725 
1726 /*
1727  * mdi_Set_lb_policy():
1728  *              Set current load balancing policy for a given client device
1729  */
1730 int
1731 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb)
1732 {
1733         mdi_client_t    *ct;
1734         int             rv = MDI_FAILURE;
1735 
1736         ct = i_devi_get_client(cdip);
1737         if (ct != NULL) {
1738                 ct->ct_lb = lb;
1739                 rv = MDI_SUCCESS;
1740         }
1741         return (rv);
1742 }
1743 
1744 /*
1745  * mdi_failover():
1746  *              failover function called by the vHCI drivers to initiate
1747  *              a failover operation.  This is typically due to non-availability
1748  *              of online paths to route I/O requests.  Failover can be
1749  *              triggered through user application also.
1750  *
1751  *              The vHCI driver calls mdi_failover() to initiate a failover
1752  *              operation. mdi_failover() calls back into the vHCI driver's
1753  *              vo_failover() entry point to perform the actual failover
1754  *              operation.  The reason for requiring the vHCI driver to
1755  *              initiate failover by calling mdi_failover(), instead of directly
1756  *              executing vo_failover() itself, is to ensure that the mdi
1757  *              framework can keep track of the client state properly.
1758  *              Additionally, mdi_failover() provides as a convenience the
1759  *              option of performing the failover operation synchronously or
1760  *              asynchronously
1761  *
1762  *              Upon successful completion of the failover operation, the
1763  *              paths that were previously ONLINE will be in the STANDBY state,
1764  *              and the newly activated paths will be in the ONLINE state.
1765  *
1766  *              The flags modifier determines whether the activation is done
1767  *              synchronously: MDI_FAILOVER_SYNC
1768  * Return Values:
1769  *              MDI_SUCCESS
1770  *              MDI_FAILURE
1771  *              MDI_BUSY
1772  */
1773 /*ARGSUSED*/
1774 int
1775 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
1776 {
1777         int                     rv;
1778         mdi_client_t            *ct;
1779 
1780         ct = i_devi_get_client(cdip);
1781         ASSERT(ct != NULL);
1782         if (ct == NULL) {
1783                 /* cdip is not a valid client device. Nothing more to do. */
1784                 return (MDI_FAILURE);
1785         }
1786 
1787         MDI_CLIENT_LOCK(ct);
1788 
1789         if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) {
1790                 /* A path to the client is being freed */
1791                 MDI_CLIENT_UNLOCK(ct);
1792                 return (MDI_BUSY);
1793         }
1794 
1795 
1796         if (MDI_CLIENT_IS_FAILED(ct)) {
1797                 /*
1798                  * Client is in failed state. Nothing more to do.
1799                  */
1800                 MDI_CLIENT_UNLOCK(ct);
1801                 return (MDI_FAILURE);
1802         }
1803 
1804         if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1805                 /*
1806                  * Failover is already in progress; return BUSY
1807                  */
1808                 MDI_CLIENT_UNLOCK(ct);
1809                 return (MDI_BUSY);
1810         }
1811         /*
1812          * Make sure that mdi_pathinfo node state changes are processed.
1813          * We do not allow failovers to progress while client path state
1814          * changes are in progress
1815          */
1816         if (ct->ct_unstable) {
1817                 if (flags == MDI_FAILOVER_ASYNC) {
1818                         MDI_CLIENT_UNLOCK(ct);
1819                         return (MDI_BUSY);
1820                 } else {
1821                         while (ct->ct_unstable)
1822                                 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex);
1823                 }
1824         }
1825 
1826         /*
1827          * Client device is in stable state. Before proceeding, perform sanity
1828          * checks again.
1829          */
1830         if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) ||
1831             (!i_ddi_devi_attached(cdip))) {
1832                 /*
1833                  * Client is in failed state. Nothing more to do.
1834                  */
1835                 MDI_CLIENT_UNLOCK(ct);
1836                 return (MDI_FAILURE);
1837         }
1838 
1839         /*
1840          * Set the client state as failover in progress.
1841          */
1842         MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct);
1843         ct->ct_failover_flags = flags;
1844         MDI_CLIENT_UNLOCK(ct);
1845 
1846         if (flags == MDI_FAILOVER_ASYNC) {
1847                 /*
1848                  * Submit the initiate failover request via CPR safe
1849                  * taskq threads.
1850                  */
1851                 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover,
1852                     ct, KM_SLEEP);
1853                 return (MDI_ACCEPT);
1854         } else {
1855                 /*
1856                  * Synchronous failover mode.  Typically invoked from the user
1857                  * land.
1858                  */
1859                 rv = i_mdi_failover(ct);
1860         }
1861         return (rv);
1862 }
1863 
1864 /*
1865  * i_mdi_failover():
1866  *              internal failover function. Invokes vHCI drivers failover
1867  *              callback function and process the failover status
1868  * Return Values:
1869  *              None
1870  *
1871  * Note: A client device in failover state can not be detached or freed.
1872  */
1873 static int
1874 i_mdi_failover(void *arg)
1875 {
1876         int             rv = MDI_SUCCESS;
1877         mdi_client_t    *ct = (mdi_client_t *)arg;
1878         mdi_vhci_t      *vh = ct->ct_vhci;
1879 
1880         ASSERT(!MDI_CLIENT_LOCKED(ct));
1881 
1882         if (vh->vh_ops->vo_failover != NULL) {
1883                 /*
1884                  * Call vHCI drivers callback routine
1885                  */
1886                 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip,
1887                     ct->ct_failover_flags);
1888         }
1889 
1890         MDI_CLIENT_LOCK(ct);
1891         MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct);
1892 
1893         /*
1894          * Save the failover return status
1895          */
1896         ct->ct_failover_status = rv;
1897 
1898         /*
1899          * As a result of failover, client status would have been changed.
1900          * Update the client state and wake up anyone waiting on this client
1901          * device.
1902          */
1903         i_mdi_client_update_state(ct);
1904 
1905         cv_broadcast(&ct->ct_failover_cv);
1906         MDI_CLIENT_UNLOCK(ct);
1907         return (rv);
1908 }
1909 
1910 /*
1911  * Load balancing is logical block.
1912  * IOs within the range described by region_size
1913  * would go on the same path. This would improve the
1914  * performance by cache-hit on some of the RAID devices.
1915  * Search only for online paths(At some point we
1916  * may want to balance across target ports).
1917  * If no paths are found then default to round-robin.
1918  */
1919 static int
1920 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp)
1921 {
1922         int             path_index = -1;
1923         int             online_path_count = 0;
1924         int             online_nonpref_path_count = 0;
1925         int             region_size = ct->ct_lb_args->region_size;
1926         mdi_pathinfo_t  *pip;
1927         mdi_pathinfo_t  *next;
1928         int             preferred, path_cnt;
1929 
1930         pip = ct->ct_path_head;
1931         while (pip) {
1932                 MDI_PI_LOCK(pip);
1933                 if (MDI_PI(pip)->pi_state ==
1934                     MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) {
1935                         online_path_count++;
1936                 } else if (MDI_PI(pip)->pi_state ==
1937                     MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) {
1938                         online_nonpref_path_count++;
1939                 }
1940                 next = (mdi_pathinfo_t *)
1941                     MDI_PI(pip)->pi_client_link;
1942                 MDI_PI_UNLOCK(pip);
1943                 pip = next;
1944         }
1945         /* if found any online/preferred then use this type */
1946         if (online_path_count > 0) {
1947                 path_cnt = online_path_count;
1948                 preferred = 1;
1949         } else if (online_nonpref_path_count > 0) {
1950                 path_cnt = online_nonpref_path_count;
1951                 preferred = 0;
1952         } else {
1953                 path_cnt = 0;
1954         }
1955         if (path_cnt) {
1956                 path_index = (bp->b_blkno >> region_size) % path_cnt;
1957                 pip = ct->ct_path_head;
1958                 while (pip && path_index != -1) {
1959                         MDI_PI_LOCK(pip);
1960                         if (path_index == 0 &&
1961                             (MDI_PI(pip)->pi_state ==
1962                             MDI_PATHINFO_STATE_ONLINE) &&
1963                                 MDI_PI(pip)->pi_preferred == preferred) {
1964                                 MDI_PI_HOLD(pip);
1965                                 MDI_PI_UNLOCK(pip);
1966                                 *ret_pip = pip;
1967                                 return (MDI_SUCCESS);
1968                         }
1969                         path_index --;
1970                         next = (mdi_pathinfo_t *)
1971                             MDI_PI(pip)->pi_client_link;
1972                         MDI_PI_UNLOCK(pip);
1973                         pip = next;
1974                 }
1975                 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
1976                     "lba %llx: path %s %p",
1977                     bp->b_lblkno, mdi_pi_spathname(pip), (void *)pip));
1978         }
1979         return (MDI_FAILURE);
1980 }
1981 
1982 /*
1983  * mdi_select_path():
1984  *              select a path to access a client device.
1985  *
1986  *              mdi_select_path() function is called by the vHCI drivers to
1987  *              select a path to route the I/O request to.  The caller passes
1988  *              the block I/O data transfer structure ("buf") as one of the
1989  *              parameters.  The mpxio framework uses the buf structure
1990  *              contents to maintain per path statistics (total I/O size /
1991  *              count pending).  If more than one online paths are available to
1992  *              select, the framework automatically selects a suitable path
1993  *              for routing I/O request. If a failover operation is active for
1994  *              this client device the call shall be failed with MDI_BUSY error
1995  *              code.
1996  *
1997  *              By default this function returns a suitable path in online
1998  *              state based on the current load balancing policy.  Currently
1999  *              we support LOAD_BALANCE_NONE (Previously selected online path
2000  *              will continue to be used till the path is usable) and
2001  *              LOAD_BALANCE_RR (Online paths will be selected in a round
2002  *              robin fashion), LOAD_BALANCE_LB(Online paths will be selected
2003  *              based on the logical block).  The load balancing
2004  *              through vHCI drivers configuration file (driver.conf).
2005  *
2006  *              vHCI drivers may override this default behavior by specifying
2007  *              appropriate flags.  The meaning of the thrid argument depends
2008  *              on the flags specified. If MDI_SELECT_PATH_INSTANCE is set
2009  *              then the argument is the "path instance" of the path to select.
2010  *              If MDI_SELECT_PATH_INSTANCE is not set then the argument is
2011  *              "start_pip". A non NULL "start_pip" is the starting point to
2012  *              walk and find the next appropriate path.  The following values
2013  *              are currently defined: MDI_SELECT_ONLINE_PATH (to select an
2014  *              ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an
2015  *              STANDBY path).
2016  *
2017  *              The non-standard behavior is used by the scsi_vhci driver,
2018  *              whenever it has to use a STANDBY/FAULTED path.  Eg. during
2019  *              attach of client devices (to avoid an unnecessary failover
2020  *              when the STANDBY path comes up first), during failover
2021  *              (to activate a STANDBY path as ONLINE).
2022  *
2023  *              The selected path is returned in a a mdi_hold_path() state
2024  *              (pi_ref_cnt). Caller should release the hold by calling
2025  *              mdi_rele_path().
2026  *
2027  * Return Values:
2028  *              MDI_SUCCESS     - Completed successfully
2029  *              MDI_BUSY        - Client device is busy failing over
2030  *              MDI_NOPATH      - Client device is online, but no valid path are
2031  *                                available to access this client device
2032  *              MDI_FAILURE     - Invalid client device or state
2033  *              MDI_DEVI_ONLINING
2034  *                              - Client device (struct dev_info state) is in
2035  *                                onlining state.
2036  */
2037 
2038 /*ARGSUSED*/
2039 int
2040 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags,
2041     void *arg, mdi_pathinfo_t **ret_pip)
2042 {
2043         mdi_client_t    *ct;
2044         mdi_pathinfo_t  *pip;
2045         mdi_pathinfo_t  *next;
2046         mdi_pathinfo_t  *head;
2047         mdi_pathinfo_t  *start;
2048         client_lb_t     lbp;    /* load balancing policy */
2049         int             sb = 1; /* standard behavior */
2050         int             preferred = 1;  /* preferred path */
2051         int             cond, cont = 1;
2052         int             retry = 0;
2053         mdi_pathinfo_t  *start_pip;     /* request starting pathinfo */
2054         int             path_instance;  /* request specific path instance */
2055 
2056         /* determine type of arg based on flags */
2057         if (flags & MDI_SELECT_PATH_INSTANCE) {
2058                 path_instance = (int)(intptr_t)arg;
2059                 start_pip = NULL;
2060         } else {
2061                 path_instance = 0;
2062                 start_pip = (mdi_pathinfo_t *)arg;
2063         }
2064 
2065         if (flags != 0) {
2066                 /*
2067                  * disable default behavior
2068                  */
2069                 sb = 0;
2070         }
2071 
2072         *ret_pip = NULL;
2073         ct = i_devi_get_client(cdip);
2074         if (ct == NULL) {
2075                 /* mdi extensions are NULL, Nothing more to do */
2076                 return (MDI_FAILURE);
2077         }
2078 
2079         MDI_CLIENT_LOCK(ct);
2080 
2081         if (sb) {
2082                 if (MDI_CLIENT_IS_FAILED(ct)) {
2083                         /*
2084                          * Client is not ready to accept any I/O requests.
2085                          * Fail this request.
2086                          */
2087                         MDI_DEBUG(2, (MDI_NOTE, cdip,
2088                             "client state offline ct = %p", (void *)ct));
2089                         MDI_CLIENT_UNLOCK(ct);
2090                         return (MDI_FAILURE);
2091                 }
2092 
2093                 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
2094                         /*
2095                          * Check for Failover is in progress. If so tell the
2096                          * caller that this device is busy.
2097                          */
2098                         MDI_DEBUG(2, (MDI_NOTE, cdip,
2099                             "client failover in progress ct = %p",
2100                             (void *)ct));
2101                         MDI_CLIENT_UNLOCK(ct);
2102                         return (MDI_BUSY);
2103                 }
2104 
2105                 /*
2106                  * Check to see whether the client device is attached.
2107                  * If not so, let the vHCI driver manually select a path
2108                  * (standby) and let the probe/attach process to continue.
2109                  */
2110                 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) {
2111                         MDI_DEBUG(4, (MDI_NOTE, cdip,
2112                             "devi is onlining ct = %p", (void *)ct));
2113                         MDI_CLIENT_UNLOCK(ct);
2114                         return (MDI_DEVI_ONLINING);
2115                 }
2116         }
2117 
2118         /*
2119          * Cache in the client list head.  If head of the list is NULL
2120          * return MDI_NOPATH
2121          */
2122         head = ct->ct_path_head;
2123         if (head == NULL) {
2124                 MDI_CLIENT_UNLOCK(ct);
2125                 return (MDI_NOPATH);
2126         }
2127 
2128         /* Caller is specifying a specific pathinfo path by path_instance */
2129         if (path_instance) {
2130                 /* search for pathinfo with correct path_instance */
2131                 for (pip = head;
2132                     pip && (mdi_pi_get_path_instance(pip) != path_instance);
2133                     pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link)
2134                         ;
2135 
2136                 /* If path can't be selected then MDI_NOPATH is returned. */
2137                 if (pip == NULL) {
2138                         MDI_CLIENT_UNLOCK(ct);
2139                         return (MDI_NOPATH);
2140                 }
2141 
2142                 /*
2143                  * Verify state of path. When asked to select a specific
2144                  * path_instance, we select the requested path in any
2145                  * state (ONLINE, OFFLINE, STANDBY, FAULT) other than INIT.
2146                  * We don't however select paths where the pHCI has detached.
2147                  * NOTE: last pathinfo node of an opened client device may
2148                  * exist in an OFFLINE state after the pHCI associated with
2149                  * that path has detached (but pi_phci will be NULL if that
2150                  * has occurred).
2151                  */
2152                 MDI_PI_LOCK(pip);
2153                 if ((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_INIT) ||
2154                     (MDI_PI(pip)->pi_phci == NULL)) {
2155                         MDI_PI_UNLOCK(pip);
2156                         MDI_CLIENT_UNLOCK(ct);
2157                         return (MDI_FAILURE);
2158                 }
2159 
2160                 /* Return MDI_BUSY if we have a transient condition */
2161                 if (MDI_PI_IS_TRANSIENT(pip)) {
2162                         MDI_PI_UNLOCK(pip);
2163                         MDI_CLIENT_UNLOCK(ct);
2164                         return (MDI_BUSY);
2165                 }
2166 
2167                 /*
2168                  * Return the path in hold state. Caller should release the
2169                  * lock by calling mdi_rele_path()
2170                  */
2171                 MDI_PI_HOLD(pip);
2172                 MDI_PI_UNLOCK(pip);
2173                 *ret_pip = pip;
2174                 MDI_CLIENT_UNLOCK(ct);
2175                 return (MDI_SUCCESS);
2176         }
2177 
2178         /*
2179          * for non default behavior, bypass current
2180          * load balancing policy and always use LOAD_BALANCE_RR
2181          * except that the start point will be adjusted based
2182          * on the provided start_pip
2183          */
2184         lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR;
2185 
2186         switch (lbp) {
2187         case LOAD_BALANCE_NONE:
2188                 /*
2189                  * Load balancing is None  or Alternate path mode
2190                  * Start looking for a online mdi_pathinfo node starting from
2191                  * last known selected path
2192                  */
2193                 preferred = 1;
2194                 pip = (mdi_pathinfo_t *)ct->ct_path_last;
2195                 if (pip == NULL) {
2196                         pip = head;
2197                 }
2198                 start = pip;
2199                 do {
2200                         MDI_PI_LOCK(pip);
2201                         /*
2202                          * No need to explicitly check if the path is disabled.
2203                          * Since we are checking for state == ONLINE and the
2204                          * same variable is used for DISABLE/ENABLE information.
2205                          */
2206                         if ((MDI_PI(pip)->pi_state  ==
2207                                 MDI_PATHINFO_STATE_ONLINE) &&
2208                                 preferred == MDI_PI(pip)->pi_preferred) {
2209                                 /*
2210                                  * Return the path in hold state. Caller should
2211                                  * release the lock by calling mdi_rele_path()
2212                                  */
2213                                 MDI_PI_HOLD(pip);
2214                                 MDI_PI_UNLOCK(pip);
2215                                 ct->ct_path_last = pip;
2216                                 *ret_pip = pip;
2217                                 MDI_CLIENT_UNLOCK(ct);
2218                                 return (MDI_SUCCESS);
2219                         }
2220 
2221                         /*
2222                          * Path is busy.
2223                          */
2224                         if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2225                             MDI_PI_IS_TRANSIENT(pip))
2226                                 retry = 1;
2227                         /*
2228                          * Keep looking for a next available online path
2229                          */
2230                         next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2231                         if (next == NULL) {
2232                                 next = head;
2233                         }
2234                         MDI_PI_UNLOCK(pip);
2235                         pip = next;
2236                         if (start == pip && preferred) {
2237                                 preferred = 0;
2238                         } else if (start == pip && !preferred) {
2239                                 cont = 0;
2240                         }
2241                 } while (cont);
2242                 break;
2243 
2244         case LOAD_BALANCE_LBA:
2245                 /*
2246                  * Make sure we are looking
2247                  * for an online path. Otherwise, if it is for a STANDBY
2248                  * path request, it will go through and fetch an ONLINE
2249                  * path which is not desirable.
2250                  */
2251                 if ((ct->ct_lb_args != NULL) &&
2252                             (ct->ct_lb_args->region_size) && bp &&
2253                                 (sb || (flags == MDI_SELECT_ONLINE_PATH))) {
2254                         if (i_mdi_lba_lb(ct, ret_pip, bp)
2255                                     == MDI_SUCCESS) {
2256                                 MDI_CLIENT_UNLOCK(ct);
2257                                 return (MDI_SUCCESS);
2258                         }
2259                 }
2260                 /* FALLTHROUGH */
2261         case LOAD_BALANCE_RR:
2262                 /*
2263                  * Load balancing is Round Robin. Start looking for a online
2264                  * mdi_pathinfo node starting from last known selected path
2265                  * as the start point.  If override flags are specified,
2266                  * process accordingly.
2267                  * If the search is already in effect(start_pip not null),
2268                  * then lets just use the same path preference to continue the
2269                  * traversal.
2270                  */
2271 
2272                 if (start_pip != NULL) {
2273                         preferred = MDI_PI(start_pip)->pi_preferred;
2274                 } else {
2275                         preferred = 1;
2276                 }
2277 
2278                 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip;
2279                 if (start == NULL) {
2280                         pip = head;
2281                 } else {
2282                         pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link;
2283                         if (pip == NULL) {
2284                                 if ( flags & MDI_SELECT_NO_PREFERRED) {
2285                                         /*
2286                                          * Return since we hit the end of list
2287                                          */
2288                                         MDI_CLIENT_UNLOCK(ct);
2289                                         return (MDI_NOPATH);
2290                                 }
2291 
2292                                 if (!sb) {
2293                                         if (preferred == 0) {
2294                                                 /*
2295                                                  * Looks like we have completed
2296                                                  * the traversal as preferred
2297                                                  * value is 0. Time to bail out.
2298                                                  */
2299                                                 *ret_pip = NULL;
2300                                                 MDI_CLIENT_UNLOCK(ct);
2301                                                 return (MDI_NOPATH);
2302                                         } else {
2303                                                 /*
2304                                                  * Looks like we reached the
2305                                                  * end of the list. Lets enable
2306                                                  * traversal of non preferred
2307                                                  * paths.
2308                                                  */
2309                                                 preferred = 0;
2310                                         }
2311                                 }
2312                                 pip = head;
2313                         }
2314                 }
2315                 start = pip;
2316                 do {
2317                         MDI_PI_LOCK(pip);
2318                         if (sb) {
2319                                 cond = ((MDI_PI(pip)->pi_state ==
2320                                     MDI_PATHINFO_STATE_ONLINE &&
2321                                         MDI_PI(pip)->pi_preferred ==
2322                                                 preferred) ? 1 : 0);
2323                         } else {
2324                                 if (flags == MDI_SELECT_ONLINE_PATH) {
2325                                         cond = ((MDI_PI(pip)->pi_state ==
2326                                             MDI_PATHINFO_STATE_ONLINE &&
2327                                                 MDI_PI(pip)->pi_preferred ==
2328                                                 preferred) ? 1 : 0);
2329                                 } else if (flags == MDI_SELECT_STANDBY_PATH) {
2330                                         cond = ((MDI_PI(pip)->pi_state ==
2331                                             MDI_PATHINFO_STATE_STANDBY &&
2332                                                 MDI_PI(pip)->pi_preferred ==
2333                                                 preferred) ? 1 : 0);
2334                                 } else if (flags == (MDI_SELECT_ONLINE_PATH |
2335                                     MDI_SELECT_STANDBY_PATH)) {
2336                                         cond = (((MDI_PI(pip)->pi_state ==
2337                                             MDI_PATHINFO_STATE_ONLINE ||
2338                                             (MDI_PI(pip)->pi_state ==
2339                                             MDI_PATHINFO_STATE_STANDBY)) &&
2340                                                 MDI_PI(pip)->pi_preferred ==
2341                                                 preferred) ? 1 : 0);
2342                                 } else if (flags ==
2343                                         (MDI_SELECT_STANDBY_PATH |
2344                                         MDI_SELECT_ONLINE_PATH |
2345                                         MDI_SELECT_USER_DISABLE_PATH)) {
2346                                         cond = (((MDI_PI(pip)->pi_state ==
2347                                             MDI_PATHINFO_STATE_ONLINE ||
2348                                             (MDI_PI(pip)->pi_state ==
2349                                             MDI_PATHINFO_STATE_STANDBY) ||
2350                                                 (MDI_PI(pip)->pi_state ==
2351                                             (MDI_PATHINFO_STATE_ONLINE|
2352                                             MDI_PATHINFO_STATE_USER_DISABLE)) ||
2353                                                 (MDI_PI(pip)->pi_state ==
2354                                             (MDI_PATHINFO_STATE_STANDBY |
2355                                             MDI_PATHINFO_STATE_USER_DISABLE)))&&
2356                                                 MDI_PI(pip)->pi_preferred ==
2357                                                 preferred) ? 1 : 0);
2358                                 } else if (flags ==
2359                                     (MDI_SELECT_STANDBY_PATH |
2360                                     MDI_SELECT_ONLINE_PATH |
2361                                     MDI_SELECT_NO_PREFERRED)) {
2362                                         cond = (((MDI_PI(pip)->pi_state ==
2363                                             MDI_PATHINFO_STATE_ONLINE) ||
2364                                             (MDI_PI(pip)->pi_state ==
2365                                             MDI_PATHINFO_STATE_STANDBY))
2366                                             ? 1 : 0);
2367                                 } else {
2368                                         cond = 0;
2369                                 }
2370                         }
2371                         /*
2372                          * No need to explicitly check if the path is disabled.
2373                          * Since we are checking for state == ONLINE and the
2374                          * same variable is used for DISABLE/ENABLE information.
2375                          */
2376                         if (cond) {
2377                                 /*
2378                                  * Return the path in hold state. Caller should
2379                                  * release the lock by calling mdi_rele_path()
2380                                  */
2381                                 MDI_PI_HOLD(pip);
2382                                 MDI_PI_UNLOCK(pip);
2383                                 if (sb)
2384                                         ct->ct_path_last = pip;
2385                                 *ret_pip = pip;
2386                                 MDI_CLIENT_UNLOCK(ct);
2387                                 return (MDI_SUCCESS);
2388                         }
2389                         /*
2390                          * Path is busy.
2391                          */
2392                         if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2393                             MDI_PI_IS_TRANSIENT(pip))
2394                                 retry = 1;
2395 
2396                         /*
2397                          * Keep looking for a next available online path
2398                          */
2399 do_again:
2400                         next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2401                         if (next == NULL) {
2402                                 if ( flags & MDI_SELECT_NO_PREFERRED) {
2403                                         /*
2404                                          * Bail out since we hit the end of list
2405                                          */
2406                                         MDI_PI_UNLOCK(pip);
2407                                         break;
2408                                 }
2409 
2410                                 if (!sb) {
2411                                         if (preferred == 1) {
2412                                                 /*
2413                                                  * Looks like we reached the
2414                                                  * end of the list. Lets enable
2415                                                  * traversal of non preferred
2416                                                  * paths.
2417                                                  */
2418                                                 preferred = 0;
2419                                                 next = head;
2420                                         } else {
2421                                                 /*
2422                                                  * We have done both the passes
2423                                                  * Preferred as well as for
2424                                                  * Non-preferred. Bail out now.
2425                                                  */
2426                                                 cont = 0;
2427                                         }
2428                                 } else {
2429                                         /*
2430                                          * Standard behavior case.
2431                                          */
2432                                         next = head;
2433                                 }
2434                         }
2435                         MDI_PI_UNLOCK(pip);
2436                         if (cont == 0) {
2437                                 break;
2438                         }
2439                         pip = next;
2440 
2441                         if (!sb) {
2442                                 /*
2443                                  * We need to handle the selection of
2444                                  * non-preferred path in the following
2445                                  * case:
2446                                  *
2447                                  * +------+   +------+   +------+   +-----+
2448                                  * | A : 1| - | B : 1| - | C : 0| - |NULL |
2449                                  * +------+   +------+   +------+   +-----+
2450                                  *
2451                                  * If we start the search with B, we need to
2452                                  * skip beyond B to pick C which is non -
2453                                  * preferred in the second pass. The following
2454                                  * test, if true, will allow us to skip over
2455                                  * the 'start'(B in the example) to select
2456                                  * other non preferred elements.
2457                                  */
2458                                 if ((start_pip != NULL) && (start_pip == pip) &&
2459                                     (MDI_PI(start_pip)->pi_preferred
2460                                     != preferred)) {
2461                                         /*
2462                                          * try again after going past the start
2463                                          * pip
2464                                          */
2465                                         MDI_PI_LOCK(pip);
2466                                         goto do_again;
2467                                 }
2468                         } else {
2469                                 /*
2470                                  * Standard behavior case
2471                                  */
2472                                 if (start == pip && preferred) {
2473                                         /* look for nonpreferred paths */
2474                                         preferred = 0;
2475                                 } else if (start == pip && !preferred) {
2476                                         /*
2477                                          * Exit condition
2478                                          */
2479                                         cont = 0;
2480                                 }
2481                         }
2482                 } while (cont);
2483                 break;
2484         }
2485 
2486         MDI_CLIENT_UNLOCK(ct);
2487         if (retry == 1) {
2488                 return (MDI_BUSY);
2489         } else {
2490                 return (MDI_NOPATH);
2491         }
2492 }
2493 
2494 /*
2495  * For a client, return the next available path to any phci
2496  *
2497  * Note:
2498  *              Caller should hold the branch's devinfo node to get a consistent
2499  *              snap shot of the mdi_pathinfo nodes.
2500  *
2501  *              Please note that even the list is stable the mdi_pathinfo
2502  *              node state and properties are volatile.  The caller should lock
2503  *              and unlock the nodes by calling mdi_pi_lock() and
2504  *              mdi_pi_unlock() functions to get a stable properties.
2505  *
2506  *              If there is a need to use the nodes beyond the hold of the
2507  *              devinfo node period (For ex. I/O), then mdi_pathinfo node
2508  *              need to be held against unexpected removal by calling
2509  *              mdi_hold_path() and should be released by calling
2510  *              mdi_rele_path() on completion.
2511  */
2512 mdi_pathinfo_t *
2513 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip)
2514 {
2515         mdi_client_t *ct;
2516 
2517         if (!MDI_CLIENT(ct_dip))
2518                 return (NULL);
2519 
2520         /*
2521          * Walk through client link
2522          */
2523         ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client;
2524         ASSERT(ct != NULL);
2525 
2526         if (pip == NULL)
2527                 return ((mdi_pathinfo_t *)ct->ct_path_head);
2528 
2529         return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link);
2530 }
2531 
2532 /*
2533  * For a phci, return the next available path to any client
2534  * Note: ditto mdi_get_next_phci_path()
2535  */
2536 mdi_pathinfo_t *
2537 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip)
2538 {
2539         mdi_phci_t *ph;
2540 
2541         if (!MDI_PHCI(ph_dip))
2542                 return (NULL);
2543 
2544         /*
2545          * Walk through pHCI link
2546          */
2547         ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci;
2548         ASSERT(ph != NULL);
2549 
2550         if (pip == NULL)
2551                 return ((mdi_pathinfo_t *)ph->ph_path_head);
2552 
2553         return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link);
2554 }
2555 
2556 /*
2557  * mdi_hold_path():
2558  *              Hold the mdi_pathinfo node against unwanted unexpected free.
2559  * Return Values:
2560  *              None
2561  */
2562 void
2563 mdi_hold_path(mdi_pathinfo_t *pip)
2564 {
2565         if (pip) {
2566                 MDI_PI_LOCK(pip);
2567                 MDI_PI_HOLD(pip);
2568                 MDI_PI_UNLOCK(pip);
2569         }
2570 }
2571 
2572 
2573 /*
2574  * mdi_rele_path():
2575  *              Release the mdi_pathinfo node which was selected
2576  *              through mdi_select_path() mechanism or manually held by
2577  *              calling mdi_hold_path().
2578  * Return Values:
2579  *              None
2580  */
2581 void
2582 mdi_rele_path(mdi_pathinfo_t *pip)
2583 {
2584         if (pip) {
2585                 MDI_PI_LOCK(pip);
2586                 MDI_PI_RELE(pip);
2587                 if (MDI_PI(pip)->pi_ref_cnt == 0) {
2588                         cv_broadcast(&MDI_PI(pip)->pi_ref_cv);
2589                 }
2590                 MDI_PI_UNLOCK(pip);
2591         }
2592 }
2593 
2594 /*
2595  * mdi_pi_lock():
2596  *              Lock the mdi_pathinfo node.
2597  * Note:
2598  *              The caller should release the lock by calling mdi_pi_unlock()
2599  */
2600 void
2601 mdi_pi_lock(mdi_pathinfo_t *pip)
2602 {
2603         ASSERT(pip != NULL);
2604         if (pip) {
2605                 MDI_PI_LOCK(pip);
2606         }
2607 }
2608 
2609 
2610 /*
2611  * mdi_pi_unlock():
2612  *              Unlock the mdi_pathinfo node.
2613  * Note:
2614  *              The mdi_pathinfo node should have been locked with mdi_pi_lock()
2615  */
2616 void
2617 mdi_pi_unlock(mdi_pathinfo_t *pip)
2618 {
2619         ASSERT(pip != NULL);
2620         if (pip) {
2621                 MDI_PI_UNLOCK(pip);
2622         }
2623 }
2624 
2625 /*
2626  * mdi_pi_find():
2627  *              Search the list of mdi_pathinfo nodes attached to the
2628  *              pHCI/Client device node whose path address matches "paddr".
2629  *              Returns a pointer to the mdi_pathinfo node if a matching node is
2630  *              found.
2631  * Return Values:
2632  *              mdi_pathinfo node handle
2633  *              NULL
2634  * Notes:
2635  *              Caller need not hold any locks to call this function.
2636  */
2637 mdi_pathinfo_t *
2638 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr)
2639 {
2640         mdi_phci_t              *ph;
2641         mdi_vhci_t              *vh;
2642         mdi_client_t            *ct;
2643         mdi_pathinfo_t          *pip = NULL;
2644 
2645         MDI_DEBUG(2, (MDI_NOTE, pdip,
2646             "caddr@%s paddr@%s", caddr ? caddr : "", paddr ? paddr : ""));
2647         if ((pdip == NULL) || (paddr == NULL)) {
2648                 return (NULL);
2649         }
2650         ph = i_devi_get_phci(pdip);
2651         if (ph == NULL) {
2652                 /*
2653                  * Invalid pHCI device, Nothing more to do.
2654                  */
2655                 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid phci"));
2656                 return (NULL);
2657         }
2658 
2659         vh = ph->ph_vhci;
2660         if (vh == NULL) {
2661                 /*
2662                  * Invalid vHCI device, Nothing more to do.
2663                  */
2664                 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid vhci"));
2665                 return (NULL);
2666         }
2667 
2668         /*
2669          * Look for pathinfo node identified by paddr.
2670          */
2671         if (caddr == NULL) {
2672                 /*
2673                  * Find a mdi_pathinfo node under pHCI list for a matching
2674                  * unit address.
2675                  */
2676                 MDI_PHCI_LOCK(ph);
2677                 if (MDI_PHCI_IS_OFFLINE(ph)) {
2678                         MDI_DEBUG(2, (MDI_WARN, pdip,
2679                             "offline phci %p", (void *)ph));
2680                         MDI_PHCI_UNLOCK(ph);
2681                         return (NULL);
2682                 }
2683                 pip = (mdi_pathinfo_t *)ph->ph_path_head;
2684 
2685                 while (pip != NULL) {
2686                         if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2687                                 break;
2688                         }
2689                         pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
2690                 }
2691                 MDI_PHCI_UNLOCK(ph);
2692                 MDI_DEBUG(2, (MDI_NOTE, pdip,
2693                     "found %s %p", mdi_pi_spathname(pip), (void *)pip));
2694                 return (pip);
2695         }
2696 
2697         /*
2698          * XXX - Is the rest of the code in this function really necessary?
2699          * The consumers of mdi_pi_find() can search for the desired pathinfo
2700          * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of
2701          * whether the search is based on the pathinfo nodes attached to
2702          * the pHCI or the client node, the result will be the same.
2703          */
2704 
2705         /*
2706          * Find the client device corresponding to 'caddr'
2707          */
2708         MDI_VHCI_CLIENT_LOCK(vh);
2709 
2710         /*
2711          * XXX - Passing NULL to the following function works as long as the
2712          * the client addresses (caddr) are unique per vhci basis.
2713          */
2714         ct = i_mdi_client_find(vh, NULL, caddr);
2715         if (ct == NULL) {
2716                 /*
2717                  * Client not found, Obviously mdi_pathinfo node has not been
2718                  * created yet.
2719                  */
2720                 MDI_VHCI_CLIENT_UNLOCK(vh);
2721                 MDI_DEBUG(2, (MDI_NOTE, pdip,
2722                     "client not found for caddr @%s", caddr ? caddr : ""));
2723                 return (NULL);
2724         }
2725 
2726         /*
2727          * Hold the client lock and look for a mdi_pathinfo node with matching
2728          * pHCI and paddr
2729          */
2730         MDI_CLIENT_LOCK(ct);
2731 
2732         /*
2733          * Release the global mutex as it is no more needed. Note: We always
2734          * respect the locking order while acquiring.
2735          */
2736         MDI_VHCI_CLIENT_UNLOCK(vh);
2737 
2738         pip = (mdi_pathinfo_t *)ct->ct_path_head;
2739         while (pip != NULL) {
2740                 /*
2741                  * Compare the unit address
2742                  */
2743                 if ((MDI_PI(pip)->pi_phci == ph) &&
2744                     strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2745                         break;
2746                 }
2747                 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2748         }
2749         MDI_CLIENT_UNLOCK(ct);
2750         MDI_DEBUG(2, (MDI_NOTE, pdip,
2751             "found: %s %p", mdi_pi_spathname(pip), (void *)pip));
2752         return (pip);
2753 }
2754 
2755 /*
2756  * mdi_pi_alloc():
2757  *              Allocate and initialize a new instance of a mdi_pathinfo node.
2758  *              The mdi_pathinfo node returned by this function identifies a
2759  *              unique device path is capable of having properties attached
2760  *              and passed to mdi_pi_online() to fully attach and online the
2761  *              path and client device node.
2762  *              The mdi_pathinfo node returned by this function must be
2763  *              destroyed using mdi_pi_free() if the path is no longer
2764  *              operational or if the caller fails to attach a client device
2765  *              node when calling mdi_pi_online(). The framework will not free
2766  *              the resources allocated.
2767  *              This function can be called from both interrupt and kernel
2768  *              contexts.  DDI_NOSLEEP flag should be used while calling
2769  *              from interrupt contexts.
2770  * Return Values:
2771  *              MDI_SUCCESS
2772  *              MDI_FAILURE
2773  *              MDI_NOMEM
2774  */
2775 /*ARGSUSED*/
2776 int
2777 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2778     char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip)
2779 {
2780         mdi_vhci_t      *vh;
2781         mdi_phci_t      *ph;
2782         mdi_client_t    *ct;
2783         mdi_pathinfo_t  *pip = NULL;
2784         dev_info_t      *cdip;
2785         int             rv = MDI_NOMEM;
2786         int             path_allocated = 0;
2787 
2788         MDI_DEBUG(2, (MDI_NOTE, pdip,
2789             "cname %s: caddr@%s paddr@%s",
2790             cname ? cname : "", caddr ? caddr : "", paddr ? paddr : ""));
2791 
2792         if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL ||
2793             ret_pip == NULL) {
2794                 /* Nothing more to do */
2795                 return (MDI_FAILURE);
2796         }
2797 
2798         *ret_pip = NULL;
2799 
2800         /* No allocations on detaching pHCI */
2801         if (DEVI_IS_DETACHING(pdip)) {
2802                 /* Invalid pHCI device, return failure */
2803                 MDI_DEBUG(1, (MDI_WARN, pdip,
2804                     "!detaching pHCI=%p", (void *)pdip));
2805                 return (MDI_FAILURE);
2806         }
2807 
2808         ph = i_devi_get_phci(pdip);
2809         ASSERT(ph != NULL);
2810         if (ph == NULL) {
2811                 /* Invalid pHCI device, return failure */
2812                 MDI_DEBUG(1, (MDI_WARN, pdip,
2813                     "!invalid pHCI=%p", (void *)pdip));
2814                 return (MDI_FAILURE);
2815         }
2816 
2817         MDI_PHCI_LOCK(ph);
2818         vh = ph->ph_vhci;
2819         if (vh == NULL) {
2820                 /* Invalid vHCI device, return failure */
2821                 MDI_DEBUG(1, (MDI_WARN, pdip,
2822                     "!invalid vHCI=%p", (void *)pdip));
2823                 MDI_PHCI_UNLOCK(ph);
2824                 return (MDI_FAILURE);
2825         }
2826 
2827         if (MDI_PHCI_IS_READY(ph) == 0) {
2828                 /*
2829                  * Do not allow new node creation when pHCI is in
2830                  * offline/suspended states
2831                  */
2832                 MDI_DEBUG(1, (MDI_WARN, pdip,
2833                     "pHCI=%p is not ready", (void *)ph));
2834                 MDI_PHCI_UNLOCK(ph);
2835                 return (MDI_BUSY);
2836         }
2837         MDI_PHCI_UNSTABLE(ph);
2838         MDI_PHCI_UNLOCK(ph);
2839 
2840         /* look for a matching client, create one if not found */
2841         MDI_VHCI_CLIENT_LOCK(vh);
2842         ct = i_mdi_client_find(vh, cname, caddr);
2843         if (ct == NULL) {
2844                 ct = i_mdi_client_alloc(vh, cname, caddr);
2845                 ASSERT(ct != NULL);
2846         }
2847 
2848         if (ct->ct_dip == NULL) {
2849                 /*
2850                  * Allocate a devinfo node
2851                  */
2852                 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr,
2853                     compatible, ncompatible);
2854                 if (ct->ct_dip == NULL) {
2855                         (void) i_mdi_client_free(vh, ct);
2856                         goto fail;
2857                 }
2858         }
2859         cdip = ct->ct_dip;
2860 
2861         DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT;
2862         DEVI(cdip)->devi_mdi_client = (caddr_t)ct;
2863 
2864         MDI_CLIENT_LOCK(ct);
2865         pip = (mdi_pathinfo_t *)ct->ct_path_head;
2866         while (pip != NULL) {
2867                 /*
2868                  * Compare the unit address
2869                  */
2870                 if ((MDI_PI(pip)->pi_phci == ph) &&
2871                     strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2872                         break;
2873                 }
2874                 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2875         }
2876         MDI_CLIENT_UNLOCK(ct);
2877 
2878         if (pip == NULL) {
2879                 /*
2880                  * This is a new path for this client device.  Allocate and
2881                  * initialize a new pathinfo node
2882                  */
2883                 pip = i_mdi_pi_alloc(ph, paddr, ct);
2884                 ASSERT(pip != NULL);
2885                 path_allocated = 1;
2886         }
2887         rv = MDI_SUCCESS;
2888 
2889 fail:
2890         /*
2891          * Release the global mutex.
2892          */
2893         MDI_VHCI_CLIENT_UNLOCK(vh);
2894 
2895         /*
2896          * Mark the pHCI as stable
2897          */
2898         MDI_PHCI_LOCK(ph);
2899         MDI_PHCI_STABLE(ph);
2900         MDI_PHCI_UNLOCK(ph);
2901         *ret_pip = pip;
2902 
2903         MDI_DEBUG(2, (MDI_NOTE, pdip,
2904             "alloc %s %p", mdi_pi_spathname(pip), (void *)pip));
2905 
2906         if (path_allocated)
2907                 vhcache_pi_add(vh->vh_config, MDI_PI(pip));
2908 
2909         return (rv);
2910 }
2911 
2912 /*ARGSUSED*/
2913 int
2914 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2915     int flags, mdi_pathinfo_t **ret_pip)
2916 {
2917         return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0,
2918             flags, ret_pip));
2919 }
2920 
2921 /*
2922  * i_mdi_pi_alloc():
2923  *              Allocate a mdi_pathinfo node and add to the pHCI path list
2924  * Return Values:
2925  *              mdi_pathinfo
2926  */
2927 /*ARGSUSED*/
2928 static mdi_pathinfo_t *
2929 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct)
2930 {
2931         mdi_pathinfo_t  *pip;
2932         int             ct_circular;
2933         int             ph_circular;
2934         static char     path[MAXPATHLEN];       /* mdi_pathmap_mutex protects */
2935         char            *path_persistent;
2936         int             path_instance;
2937         mod_hash_val_t  hv;
2938 
2939         ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci));
2940 
2941         pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP);
2942         mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL);
2943         MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT |
2944             MDI_PATHINFO_STATE_TRANSIENT;
2945 
2946         if (MDI_PHCI_IS_USER_DISABLED(ph))
2947                 MDI_PI_SET_USER_DISABLE(pip);
2948 
2949         if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph))
2950                 MDI_PI_SET_DRV_DISABLE_TRANS(pip);
2951 
2952         if (MDI_PHCI_IS_DRV_DISABLED(ph))
2953                 MDI_PI_SET_DRV_DISABLE(pip);
2954 
2955         MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT;
2956         cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL);
2957         MDI_PI(pip)->pi_client = ct;
2958         MDI_PI(pip)->pi_phci = ph;
2959         MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP);
2960         (void) strcpy(MDI_PI(pip)->pi_addr, paddr);
2961 
2962         /*
2963          * We form the "path" to the pathinfo node, and see if we have
2964          * already allocated a 'path_instance' for that "path".  If so,
2965          * we use the already allocated 'path_instance'.  If not, we
2966          * allocate a new 'path_instance' and associate it with a copy of
2967          * the "path" string (which is never freed). The association
2968          * between a 'path_instance' this "path" string persists until
2969          * reboot.
2970          */
2971         mutex_enter(&mdi_pathmap_mutex);
2972         (void) ddi_pathname(ph->ph_dip, path);
2973         (void) sprintf(path + strlen(path), "/%s@%s",
2974             mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
2975         if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) {
2976                 path_instance = (uint_t)(intptr_t)hv;
2977         } else {
2978                 /* allocate a new 'path_instance' and persistent "path" */
2979                 path_instance = mdi_pathmap_instance++;
2980                 path_persistent = i_ddi_strdup(path, KM_SLEEP);
2981                 (void) mod_hash_insert(mdi_pathmap_bypath,
2982                     (mod_hash_key_t)path_persistent,
2983                     (mod_hash_val_t)(intptr_t)path_instance);
2984                 (void) mod_hash_insert(mdi_pathmap_byinstance,
2985                     (mod_hash_key_t)(intptr_t)path_instance,
2986                     (mod_hash_val_t)path_persistent);
2987 
2988                 /* create shortpath name */
2989                 (void) snprintf(path, sizeof(path), "%s%d/%s@%s",
2990                     ddi_driver_name(ph->ph_dip), ddi_get_instance(ph->ph_dip),
2991                     mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
2992                 path_persistent = i_ddi_strdup(path, KM_SLEEP);
2993                 (void) mod_hash_insert(mdi_pathmap_sbyinstance,
2994                     (mod_hash_key_t)(intptr_t)path_instance,
2995                     (mod_hash_val_t)path_persistent);
2996         }
2997         mutex_exit(&mdi_pathmap_mutex);
2998         MDI_PI(pip)->pi_path_instance = path_instance;
2999 
3000         (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP);
3001         ASSERT(MDI_PI(pip)->pi_prop != NULL);
3002         MDI_PI(pip)->pi_pprivate = NULL;
3003         MDI_PI(pip)->pi_cprivate = NULL;
3004         MDI_PI(pip)->pi_vprivate = NULL;
3005         MDI_PI(pip)->pi_client_link = NULL;
3006         MDI_PI(pip)->pi_phci_link = NULL;
3007         MDI_PI(pip)->pi_ref_cnt = 0;
3008         MDI_PI(pip)->pi_kstats = NULL;
3009         MDI_PI(pip)->pi_preferred = 1;
3010         cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL);
3011 
3012         /*
3013          * Lock both dev_info nodes against changes in parallel.
3014          *
3015          * The ndi_devi_enter(Client), is atypical since the client is a leaf.
3016          * This atypical operation is done to synchronize pathinfo nodes
3017          * during devinfo snapshot (see di_register_pip) by 'pretending' that
3018          * the pathinfo nodes are children of the Client.
3019          */
3020         ndi_devi_enter(ct->ct_dip, &ct_circular);
3021         ndi_devi_enter(ph->ph_dip, &ph_circular);
3022 
3023         i_mdi_phci_add_path(ph, pip);
3024         i_mdi_client_add_path(ct, pip);
3025 
3026         ndi_devi_exit(ph->ph_dip, ph_circular);
3027         ndi_devi_exit(ct->ct_dip, ct_circular);
3028 
3029         return (pip);
3030 }
3031 
3032 /*
3033  * mdi_pi_pathname_by_instance():
3034  *      Lookup of "path" by 'path_instance'. Return "path".
3035  *      NOTE: returned "path" remains valid forever (until reboot).
3036  */
3037 char *
3038 mdi_pi_pathname_by_instance(int path_instance)
3039 {
3040         char            *path;
3041         mod_hash_val_t  hv;
3042 
3043         /* mdi_pathmap lookup of "path" by 'path_instance' */
3044         mutex_enter(&mdi_pathmap_mutex);
3045         if (mod_hash_find(mdi_pathmap_byinstance,
3046             (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
3047                 path = (char *)hv;
3048         else
3049                 path = NULL;
3050         mutex_exit(&mdi_pathmap_mutex);
3051         return (path);
3052 }
3053 
3054 /*
3055  * mdi_pi_spathname_by_instance():
3056  *      Lookup of "shortpath" by 'path_instance'. Return "shortpath".
3057  *      NOTE: returned "shortpath" remains valid forever (until reboot).
3058  */
3059 char *
3060 mdi_pi_spathname_by_instance(int path_instance)
3061 {
3062         char            *path;
3063         mod_hash_val_t  hv;
3064 
3065         /* mdi_pathmap lookup of "path" by 'path_instance' */
3066         mutex_enter(&mdi_pathmap_mutex);
3067         if (mod_hash_find(mdi_pathmap_sbyinstance,
3068             (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
3069                 path = (char *)hv;
3070         else
3071                 path = NULL;
3072         mutex_exit(&mdi_pathmap_mutex);
3073         return (path);
3074 }
3075 
3076 
3077 /*
3078  * i_mdi_phci_add_path():
3079  *              Add a mdi_pathinfo node to pHCI list.
3080  * Notes:
3081  *              Caller should per-pHCI mutex
3082  */
3083 static void
3084 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3085 {
3086         ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3087 
3088         MDI_PHCI_LOCK(ph);
3089         if (ph->ph_path_head == NULL) {
3090                 ph->ph_path_head = pip;
3091         } else {
3092                 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip);
3093         }
3094         ph->ph_path_tail = pip;
3095         ph->ph_path_count++;
3096         MDI_PHCI_UNLOCK(ph);
3097 }
3098 
3099 /*
3100  * i_mdi_client_add_path():
3101  *              Add mdi_pathinfo node to client list
3102  */
3103 static void
3104 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3105 {
3106         ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3107 
3108         MDI_CLIENT_LOCK(ct);
3109         if (ct->ct_path_head == NULL) {
3110                 ct->ct_path_head = pip;
3111         } else {
3112                 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip);
3113         }
3114         ct->ct_path_tail = pip;
3115         ct->ct_path_count++;
3116         MDI_CLIENT_UNLOCK(ct);
3117 }
3118 
3119 /*
3120  * mdi_pi_free():
3121  *              Free the mdi_pathinfo node and also client device node if this
3122  *              is the last path to the device
3123  * Return Values:
3124  *              MDI_SUCCESS
3125  *              MDI_FAILURE
3126  *              MDI_BUSY
3127  */
3128 /*ARGSUSED*/
3129 int
3130 mdi_pi_free(mdi_pathinfo_t *pip, int flags)
3131 {
3132         int             rv;
3133         mdi_vhci_t      *vh;
3134         mdi_phci_t      *ph;
3135         mdi_client_t    *ct;
3136         int             (*f)();
3137         int             client_held = 0;
3138 
3139         MDI_PI_LOCK(pip);
3140         ph = MDI_PI(pip)->pi_phci;
3141         ASSERT(ph != NULL);
3142         if (ph == NULL) {
3143                 /*
3144                  * Invalid pHCI device, return failure
3145                  */
3146                 MDI_DEBUG(1, (MDI_WARN, NULL,
3147                     "!invalid pHCI: pip %s %p",
3148                     mdi_pi_spathname(pip), (void *)pip));
3149                 MDI_PI_UNLOCK(pip);
3150                 return (MDI_FAILURE);
3151         }
3152 
3153         vh = ph->ph_vhci;
3154         ASSERT(vh != NULL);
3155         if (vh == NULL) {
3156                 /* Invalid pHCI device, return failure */
3157                 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3158                     "!invalid vHCI: pip %s %p",
3159                     mdi_pi_spathname(pip), (void *)pip));
3160                 MDI_PI_UNLOCK(pip);
3161                 return (MDI_FAILURE);
3162         }
3163 
3164         ct = MDI_PI(pip)->pi_client;
3165         ASSERT(ct != NULL);
3166         if (ct == NULL) {
3167                 /*
3168                  * Invalid Client device, return failure
3169                  */
3170                 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3171                     "!invalid client: pip %s %p",
3172                     mdi_pi_spathname(pip), (void *)pip));
3173                 MDI_PI_UNLOCK(pip);
3174                 return (MDI_FAILURE);
3175         }
3176 
3177         /*
3178          * Check to see for busy condition.  A mdi_pathinfo can only be freed
3179          * if the node state is either offline or init and the reference count
3180          * is zero.
3181          */
3182         if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) ||
3183             MDI_PI_IS_INITING(pip))) {
3184                 /*
3185                  * Node is busy
3186                  */
3187                 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3188                     "!busy: pip %s %p", mdi_pi_spathname(pip), (void *)pip));
3189                 MDI_PI_UNLOCK(pip);
3190                 return (MDI_BUSY);
3191         }
3192 
3193         while (MDI_PI(pip)->pi_ref_cnt != 0) {
3194                 /*
3195                  * Give a chance for pending I/Os to complete.
3196                  */
3197                 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3198                     "!%d cmds still pending on path: %s %p",
3199                     MDI_PI(pip)->pi_ref_cnt,
3200                     mdi_pi_spathname(pip), (void *)pip));
3201                 if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
3202                     &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
3203                     TR_CLOCK_TICK) == -1) {
3204                         /*
3205                          * The timeout time reached without ref_cnt being zero
3206                          * being signaled.
3207                          */
3208                         MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3209                             "!Timeout reached on path %s %p without the cond",
3210                             mdi_pi_spathname(pip), (void *)pip));
3211                         MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3212                             "!%d cmds still pending on path %s %p",
3213                             MDI_PI(pip)->pi_ref_cnt,
3214                             mdi_pi_spathname(pip), (void *)pip));
3215                         MDI_PI_UNLOCK(pip);
3216                         return (MDI_BUSY);
3217                 }
3218         }
3219         if (MDI_PI(pip)->pi_pm_held) {
3220                 client_held = 1;
3221         }
3222         MDI_PI_UNLOCK(pip);
3223 
3224         vhcache_pi_remove(vh->vh_config, MDI_PI(pip));
3225 
3226         MDI_CLIENT_LOCK(ct);
3227 
3228         /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */
3229         MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct);
3230 
3231         /*
3232          * Wait till failover is complete before removing this node.
3233          */
3234         while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3235                 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3236 
3237         MDI_CLIENT_UNLOCK(ct);
3238         MDI_VHCI_CLIENT_LOCK(vh);
3239         MDI_CLIENT_LOCK(ct);
3240         MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct);
3241 
3242         if (!MDI_PI_IS_INITING(pip)) {
3243                 f = vh->vh_ops->vo_pi_uninit;
3244                 if (f != NULL) {
3245                         rv = (*f)(vh->vh_dip, pip, 0);
3246                 }
3247         } else
3248                 rv = MDI_SUCCESS;
3249 
3250         /*
3251          * If vo_pi_uninit() completed successfully.
3252          */
3253         if (rv == MDI_SUCCESS) {
3254                 if (client_held) {
3255                         MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3256                             "i_mdi_pm_rele_client\n"));
3257                         i_mdi_pm_rele_client(ct, 1);
3258                 }
3259                 i_mdi_pi_free(ph, pip, ct);
3260                 if (ct->ct_path_count == 0) {
3261                         /*
3262                          * Client lost its last path.
3263                          * Clean up the client device
3264                          */
3265                         MDI_CLIENT_UNLOCK(ct);
3266                         (void) i_mdi_client_free(ct->ct_vhci, ct);
3267                         MDI_VHCI_CLIENT_UNLOCK(vh);
3268                         return (rv);
3269                 }
3270         }
3271         MDI_CLIENT_UNLOCK(ct);
3272         MDI_VHCI_CLIENT_UNLOCK(vh);
3273 
3274         if (rv == MDI_FAILURE)
3275                 vhcache_pi_add(vh->vh_config, MDI_PI(pip));
3276 
3277         return (rv);
3278 }
3279 
3280 /*
3281  * i_mdi_pi_free():
3282  *              Free the mdi_pathinfo node
3283  */
3284 static void
3285 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct)
3286 {
3287         int     ct_circular;
3288         int     ph_circular;
3289 
3290         ASSERT(MDI_CLIENT_LOCKED(ct));
3291 
3292         /*
3293          * remove any per-path kstats
3294          */
3295         i_mdi_pi_kstat_destroy(pip);
3296 
3297         /* See comments in i_mdi_pi_alloc() */
3298         ndi_devi_enter(ct->ct_dip, &ct_circular);
3299         ndi_devi_enter(ph->ph_dip, &ph_circular);
3300 
3301         i_mdi_client_remove_path(ct, pip);
3302         i_mdi_phci_remove_path(ph, pip);
3303 
3304         ndi_devi_exit(ph->ph_dip, ph_circular);
3305         ndi_devi_exit(ct->ct_dip, ct_circular);
3306 
3307         mutex_destroy(&MDI_PI(pip)->pi_mutex);
3308         cv_destroy(&MDI_PI(pip)->pi_state_cv);
3309         cv_destroy(&MDI_PI(pip)->pi_ref_cv);
3310         if (MDI_PI(pip)->pi_addr) {
3311                 kmem_free(MDI_PI(pip)->pi_addr,
3312                     strlen(MDI_PI(pip)->pi_addr) + 1);
3313                 MDI_PI(pip)->pi_addr = NULL;
3314         }
3315 
3316         if (MDI_PI(pip)->pi_prop) {
3317                 (void) nvlist_free(MDI_PI(pip)->pi_prop);
3318                 MDI_PI(pip)->pi_prop = NULL;
3319         }
3320         kmem_free(pip, sizeof (struct mdi_pathinfo));
3321 }
3322 
3323 
3324 /*
3325  * i_mdi_phci_remove_path():
3326  *              Remove a mdi_pathinfo node from pHCI list.
3327  * Notes:
3328  *              Caller should hold per-pHCI mutex
3329  */
3330 static void
3331 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3332 {
3333         mdi_pathinfo_t  *prev = NULL;
3334         mdi_pathinfo_t  *path = NULL;
3335 
3336         ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3337 
3338         MDI_PHCI_LOCK(ph);
3339         path = ph->ph_path_head;
3340         while (path != NULL) {
3341                 if (path == pip) {
3342                         break;
3343                 }
3344                 prev = path;
3345                 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3346         }
3347 
3348         if (path) {
3349                 ph->ph_path_count--;
3350                 if (prev) {
3351                         MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link;
3352                 } else {
3353                         ph->ph_path_head =
3354                             (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3355                 }
3356                 if (ph->ph_path_tail == path) {
3357                         ph->ph_path_tail = prev;
3358                 }
3359         }
3360 
3361         /*
3362          * Clear the pHCI link
3363          */
3364         MDI_PI(pip)->pi_phci_link = NULL;
3365         MDI_PI(pip)->pi_phci = NULL;
3366         MDI_PHCI_UNLOCK(ph);
3367 }
3368 
3369 /*
3370  * i_mdi_client_remove_path():
3371  *              Remove a mdi_pathinfo node from client path list.
3372  */
3373 static void
3374 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3375 {
3376         mdi_pathinfo_t  *prev = NULL;
3377         mdi_pathinfo_t  *path;
3378 
3379         ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3380 
3381         ASSERT(MDI_CLIENT_LOCKED(ct));
3382         path = ct->ct_path_head;
3383         while (path != NULL) {
3384                 if (path == pip) {
3385                         break;
3386                 }
3387                 prev = path;
3388                 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3389         }
3390 
3391         if (path) {
3392                 ct->ct_path_count--;
3393                 if (prev) {
3394                         MDI_PI(prev)->pi_client_link =
3395                             MDI_PI(path)->pi_client_link;
3396                 } else {
3397                         ct->ct_path_head =
3398                             (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3399                 }
3400                 if (ct->ct_path_tail == path) {
3401                         ct->ct_path_tail = prev;
3402                 }
3403                 if (ct->ct_path_last == path) {
3404                         ct->ct_path_last = ct->ct_path_head;
3405                 }
3406         }
3407         MDI_PI(pip)->pi_client_link = NULL;
3408         MDI_PI(pip)->pi_client = NULL;
3409 }
3410 
3411 /*
3412  * i_mdi_pi_state_change():
3413  *              online a mdi_pathinfo node
3414  *
3415  * Return Values:
3416  *              MDI_SUCCESS
3417  *              MDI_FAILURE
3418  */
3419 /*ARGSUSED*/
3420 static int
3421 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag)
3422 {
3423         int             rv = MDI_SUCCESS;
3424         mdi_vhci_t      *vh;
3425         mdi_phci_t      *ph;
3426         mdi_client_t    *ct;
3427         int             (*f)();
3428         dev_info_t      *cdip;
3429 
3430         MDI_PI_LOCK(pip);
3431 
3432         ph = MDI_PI(pip)->pi_phci;
3433         ASSERT(ph);
3434         if (ph == NULL) {
3435                 /*
3436                  * Invalid pHCI device, fail the request
3437                  */
3438                 MDI_PI_UNLOCK(pip);
3439                 MDI_DEBUG(1, (MDI_WARN, NULL,
3440                     "!invalid phci: pip %s %p",
3441                     mdi_pi_spathname(pip), (void *)pip));
3442                 return (MDI_FAILURE);
3443         }
3444 
3445         vh = ph->ph_vhci;
3446         ASSERT(vh);
3447         if (vh == NULL) {
3448                 /*
3449                  * Invalid vHCI device, fail the request
3450                  */
3451                 MDI_PI_UNLOCK(pip);
3452                 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3453                     "!invalid vhci: pip %s %p",
3454                     mdi_pi_spathname(pip), (void *)pip));
3455                 return (MDI_FAILURE);
3456         }
3457 
3458         ct = MDI_PI(pip)->pi_client;
3459         ASSERT(ct != NULL);
3460         if (ct == NULL) {
3461                 /*
3462                  * Invalid client device, fail the request
3463                  */
3464                 MDI_PI_UNLOCK(pip);
3465                 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3466                     "!invalid client: pip %s %p",
3467                     mdi_pi_spathname(pip), (void *)pip));
3468                 return (MDI_FAILURE);
3469         }
3470 
3471         /*
3472          * If this path has not been initialized yet, Callback vHCI driver's
3473          * pathinfo node initialize entry point
3474          */
3475 
3476         if (MDI_PI_IS_INITING(pip)) {
3477                 MDI_PI_UNLOCK(pip);
3478                 f = vh->vh_ops->vo_pi_init;
3479                 if (f != NULL) {
3480                         rv = (*f)(vh->vh_dip, pip, 0);
3481                         if (rv != MDI_SUCCESS) {
3482                                 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3483                                     "!vo_pi_init failed: vHCI %p, pip %s %p",
3484                                     (void *)vh, mdi_pi_spathname(pip),
3485                                     (void *)pip));
3486                                 return (MDI_FAILURE);
3487                         }
3488                 }
3489                 MDI_PI_LOCK(pip);
3490                 MDI_PI_CLEAR_TRANSIENT(pip);
3491         }
3492 
3493         /*
3494          * Do not allow state transition when pHCI is in offline/suspended
3495          * states
3496          */
3497         i_mdi_phci_lock(ph, pip);
3498         if (MDI_PHCI_IS_READY(ph) == 0) {
3499                 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3500                     "!pHCI not ready, pHCI=%p", (void *)ph));
3501                 MDI_PI_UNLOCK(pip);
3502                 i_mdi_phci_unlock(ph);
3503                 return (MDI_BUSY);
3504         }
3505         MDI_PHCI_UNSTABLE(ph);
3506         i_mdi_phci_unlock(ph);
3507 
3508         /*
3509          * Check if mdi_pathinfo state is in transient state.
3510          * If yes, offlining is in progress and wait till transient state is
3511          * cleared.
3512          */
3513         if (MDI_PI_IS_TRANSIENT(pip)) {
3514                 while (MDI_PI_IS_TRANSIENT(pip)) {
3515                         cv_wait(&MDI_PI(pip)->pi_state_cv,
3516                             &MDI_PI(pip)->pi_mutex);
3517                 }
3518         }
3519 
3520         /*
3521          * Grab the client lock in reverse order sequence and release the
3522          * mdi_pathinfo mutex.
3523          */
3524         i_mdi_client_lock(ct, pip);
3525         MDI_PI_UNLOCK(pip);
3526 
3527         /*
3528          * Wait till failover state is cleared
3529          */
3530         while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3531                 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3532 
3533         /*
3534          * Mark the mdi_pathinfo node state as transient
3535          */
3536         MDI_PI_LOCK(pip);
3537         switch (state) {
3538         case MDI_PATHINFO_STATE_ONLINE:
3539                 MDI_PI_SET_ONLINING(pip);
3540                 break;
3541 
3542         case MDI_PATHINFO_STATE_STANDBY:
3543                 MDI_PI_SET_STANDBYING(pip);
3544                 break;
3545 
3546         case MDI_PATHINFO_STATE_FAULT:
3547                 /*
3548                  * Mark the pathinfo state as FAULTED
3549                  */
3550                 MDI_PI_SET_FAULTING(pip);
3551                 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR);
3552                 break;
3553 
3554         case MDI_PATHINFO_STATE_OFFLINE:
3555                 /*
3556                  * ndi_devi_offline() cannot hold pip or ct locks.
3557                  */
3558                 MDI_PI_UNLOCK(pip);
3559 
3560                 /*
3561                  * If this is a user initiated path online->offline operation
3562                  * who's success would transition a client from DEGRADED to
3563                  * FAILED then only proceed if we can offline the client first.
3564                  */
3565                 cdip = ct->ct_dip;
3566                 if ((flag & NDI_USER_REQ) &&
3567                     MDI_PI_IS_ONLINE(pip) &&
3568                     (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) {
3569                         i_mdi_client_unlock(ct);
3570                         rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN);
3571                         if (rv != NDI_SUCCESS) {
3572                                 /*
3573                                  * Convert to MDI error code
3574                                  */
3575                                 switch (rv) {
3576                                 case NDI_BUSY:
3577                                         rv = MDI_BUSY;
3578                                         break;
3579                                 default:
3580                                         rv = MDI_FAILURE;
3581                                         break;
3582                                 }
3583                                 goto state_change_exit;
3584                         } else {
3585                                 i_mdi_client_lock(ct, NULL);
3586                         }
3587                 }
3588                 /*
3589                  * Mark the mdi_pathinfo node state as transient
3590                  */
3591                 MDI_PI_LOCK(pip);
3592                 MDI_PI_SET_OFFLINING(pip);
3593                 break;
3594         }
3595         MDI_PI_UNLOCK(pip);
3596         MDI_CLIENT_UNSTABLE(ct);
3597         i_mdi_client_unlock(ct);
3598 
3599         f = vh->vh_ops->vo_pi_state_change;
3600         if (f != NULL)
3601                 rv = (*f)(vh->vh_dip, pip, state, 0, flag);
3602 
3603         MDI_CLIENT_LOCK(ct);
3604         MDI_PI_LOCK(pip);
3605         if (rv == MDI_NOT_SUPPORTED) {
3606                 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct);
3607         }
3608         if (rv != MDI_SUCCESS) {
3609                 MDI_DEBUG(2, (MDI_WARN, ct->ct_dip,
3610                     "vo_pi_state_change failed: rv %x", rv));
3611         }
3612         if (MDI_PI_IS_TRANSIENT(pip)) {
3613                 if (rv == MDI_SUCCESS) {
3614                         MDI_PI_CLEAR_TRANSIENT(pip);
3615                 } else {
3616                         MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
3617                 }
3618         }
3619 
3620         /*
3621          * Wake anyone waiting for this mdi_pathinfo node
3622          */
3623         cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3624         MDI_PI_UNLOCK(pip);
3625 
3626         /*
3627          * Mark the client device as stable
3628          */
3629         MDI_CLIENT_STABLE(ct);
3630         if (rv == MDI_SUCCESS) {
3631                 if (ct->ct_unstable == 0) {
3632                         cdip = ct->ct_dip;
3633 
3634                         /*
3635                          * Onlining the mdi_pathinfo node will impact the
3636                          * client state Update the client and dev_info node
3637                          * state accordingly
3638                          */
3639                         rv = NDI_SUCCESS;
3640                         i_mdi_client_update_state(ct);
3641                         switch (MDI_CLIENT_STATE(ct)) {
3642                         case MDI_CLIENT_STATE_OPTIMAL:
3643                         case MDI_CLIENT_STATE_DEGRADED:
3644                                 if (cdip && !i_ddi_devi_attached(cdip) &&
3645                                     ((state == MDI_PATHINFO_STATE_ONLINE) ||
3646                                     (state == MDI_PATHINFO_STATE_STANDBY))) {
3647 
3648                                         /*
3649                                          * Must do ndi_devi_online() through
3650                                          * hotplug thread for deferred
3651                                          * attach mechanism to work
3652                                          */
3653                                         MDI_CLIENT_UNLOCK(ct);
3654                                         rv = ndi_devi_online(cdip, 0);
3655                                         MDI_CLIENT_LOCK(ct);
3656                                         if ((rv != NDI_SUCCESS) &&
3657                                             (MDI_CLIENT_STATE(ct) ==
3658                                             MDI_CLIENT_STATE_DEGRADED)) {
3659                                                 /*
3660                                                  * ndi_devi_online failed.
3661                                                  * Reset client flags to
3662                                                  * offline.
3663                                                  */
3664                                                 MDI_DEBUG(1, (MDI_WARN, cdip,
3665                                                     "!ndi_devi_online failed "
3666                                                     "error %x", rv));
3667                                                 MDI_CLIENT_SET_OFFLINE(ct);
3668                                         }
3669                                         if (rv != NDI_SUCCESS) {
3670                                                 /* Reset the path state */
3671                                                 MDI_PI_LOCK(pip);
3672                                                 MDI_PI(pip)->pi_state =
3673                                                     MDI_PI_OLD_STATE(pip);
3674                                                 MDI_PI_UNLOCK(pip);
3675                                         }
3676                                 }
3677                                 break;
3678 
3679                         case MDI_CLIENT_STATE_FAILED:
3680                                 /*
3681                                  * This is the last path case for
3682                                  * non-user initiated events.
3683                                  */
3684                                 if (((flag & NDI_USER_REQ) == 0) &&
3685                                     cdip && (i_ddi_node_state(cdip) >=
3686                                     DS_INITIALIZED)) {
3687                                         MDI_CLIENT_UNLOCK(ct);
3688                                         rv = ndi_devi_offline(cdip,
3689                                             NDI_DEVFS_CLEAN);
3690                                         MDI_CLIENT_LOCK(ct);
3691 
3692                                         if (rv != NDI_SUCCESS) {
3693                                                 /*
3694                                                  * ndi_devi_offline failed.
3695                                                  * Reset client flags to
3696                                                  * online as the path could not
3697                                                  * be offlined.
3698                                                  */
3699                                                 MDI_DEBUG(1, (MDI_WARN, cdip,
3700                                                     "!ndi_devi_offline failed: "
3701                                                     "error %x", rv));
3702                                                 MDI_CLIENT_SET_ONLINE(ct);
3703                                         }
3704                                 }
3705                                 break;
3706                         }
3707                         /*
3708                          * Convert to MDI error code
3709                          */
3710                         switch (rv) {
3711                         case NDI_SUCCESS:
3712                                 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3713                                 i_mdi_report_path_state(ct, pip);
3714                                 rv = MDI_SUCCESS;
3715                                 break;
3716                         case NDI_BUSY:
3717                                 rv = MDI_BUSY;
3718                                 break;
3719                         default:
3720                                 rv = MDI_FAILURE;
3721                                 break;
3722                         }
3723                 }
3724         }
3725         MDI_CLIENT_UNLOCK(ct);
3726 
3727 state_change_exit:
3728         /*
3729          * Mark the pHCI as stable again.
3730          */
3731         MDI_PHCI_LOCK(ph);
3732         MDI_PHCI_STABLE(ph);
3733         MDI_PHCI_UNLOCK(ph);
3734         return (rv);
3735 }
3736 
3737 /*
3738  * mdi_pi_online():
3739  *              Place the path_info node in the online state.  The path is
3740  *              now available to be selected by mdi_select_path() for
3741  *              transporting I/O requests to client devices.
3742  * Return Values:
3743  *              MDI_SUCCESS
3744  *              MDI_FAILURE
3745  */
3746 int
3747 mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3748 {
3749         mdi_client_t    *ct = MDI_PI(pip)->pi_client;
3750         int             client_held = 0;
3751         int             rv;
3752 
3753         ASSERT(ct != NULL);
3754         rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags);
3755         if (rv != MDI_SUCCESS)
3756                 return (rv);
3757 
3758         MDI_PI_LOCK(pip);
3759         if (MDI_PI(pip)->pi_pm_held == 0) {
3760                 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3761                     "i_mdi_pm_hold_pip %p", (void *)pip));
3762                 i_mdi_pm_hold_pip(pip);
3763                 client_held = 1;
3764         }
3765         MDI_PI_UNLOCK(pip);
3766 
3767         if (client_held) {
3768                 MDI_CLIENT_LOCK(ct);
3769                 if (ct->ct_power_cnt == 0) {
3770                         rv = i_mdi_power_all_phci(ct);
3771                 }
3772 
3773                 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3774                     "i_mdi_pm_hold_client %p", (void *)ct));
3775                 i_mdi_pm_hold_client(ct, 1);
3776                 MDI_CLIENT_UNLOCK(ct);
3777         }
3778 
3779         return (rv);
3780 }
3781 
3782 /*
3783  * mdi_pi_standby():
3784  *              Place the mdi_pathinfo node in standby state
3785  *
3786  * Return Values:
3787  *              MDI_SUCCESS
3788  *              MDI_FAILURE
3789  */
3790 int
3791 mdi_pi_standby(mdi_pathinfo_t *pip, int flags)
3792 {
3793         return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags));
3794 }
3795 
3796 /*
3797  * mdi_pi_fault():
3798  *              Place the mdi_pathinfo node in fault'ed state
3799  * Return Values:
3800  *              MDI_SUCCESS
3801  *              MDI_FAILURE
3802  */
3803 int
3804 mdi_pi_fault(mdi_pathinfo_t *pip, int flags)
3805 {
3806         return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags));
3807 }
3808 
3809 /*
3810  * mdi_pi_offline():
3811  *              Offline a mdi_pathinfo node.
3812  * Return Values:
3813  *              MDI_SUCCESS
3814  *              MDI_FAILURE
3815  */
3816 int
3817 mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3818 {
3819         int     ret, client_held = 0;
3820         mdi_client_t    *ct;
3821 
3822         /*
3823          * Original code overloaded NDI_DEVI_REMOVE to this interface, and
3824          * used it to mean "user initiated operation" (i.e. devctl). Callers
3825          * should now just use NDI_USER_REQ.
3826          */
3827         if (flags & NDI_DEVI_REMOVE) {
3828                 flags &= ~NDI_DEVI_REMOVE;
3829                 flags |= NDI_USER_REQ;
3830         }
3831 
3832         ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags);
3833 
3834         if (ret == MDI_SUCCESS) {
3835                 MDI_PI_LOCK(pip);
3836                 if (MDI_PI(pip)->pi_pm_held) {
3837                         client_held = 1;
3838                 }
3839                 MDI_PI_UNLOCK(pip);
3840 
3841                 if (client_held) {
3842                         ct = MDI_PI(pip)->pi_client;
3843                         MDI_CLIENT_LOCK(ct);
3844                         MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3845                             "i_mdi_pm_rele_client\n"));
3846                         i_mdi_pm_rele_client(ct, 1);
3847                         MDI_CLIENT_UNLOCK(ct);
3848                 }
3849         }
3850 
3851         return (ret);
3852 }
3853 
3854 /*
3855  * i_mdi_pi_offline():
3856  *              Offline a mdi_pathinfo node and call the vHCI driver's callback
3857  */
3858 static int
3859 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3860 {
3861         dev_info_t      *vdip = NULL;
3862         mdi_vhci_t      *vh = NULL;
3863         mdi_client_t    *ct = NULL;
3864         int             (*f)();
3865         int             rv;
3866 
3867         MDI_PI_LOCK(pip);
3868         ct = MDI_PI(pip)->pi_client;
3869         ASSERT(ct != NULL);
3870 
3871         while (MDI_PI(pip)->pi_ref_cnt != 0) {
3872                 /*
3873                  * Give a chance for pending I/Os to complete.
3874                  */
3875                 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3876                     "!%d cmds still pending on path %s %p",
3877                     MDI_PI(pip)->pi_ref_cnt, mdi_pi_spathname(pip),
3878                     (void *)pip));
3879                 if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
3880                     &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
3881                     TR_CLOCK_TICK) == -1) {
3882                         /*
3883                          * The timeout time reached without ref_cnt being zero
3884                          * being signaled.
3885                          */
3886                         MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3887                             "!Timeout reached on path %s %p without the cond",
3888                             mdi_pi_spathname(pip), (void *)pip));
3889                         MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3890                             "!%d cmds still pending on path %s %p",
3891                             MDI_PI(pip)->pi_ref_cnt,
3892                             mdi_pi_spathname(pip), (void *)pip));
3893                 }
3894         }
3895         vh = ct->ct_vhci;
3896         vdip = vh->vh_dip;
3897 
3898         /*
3899          * Notify vHCI that has registered this event
3900          */
3901         ASSERT(vh->vh_ops);
3902         f = vh->vh_ops->vo_pi_state_change;
3903 
3904         if (f != NULL) {
3905                 MDI_PI_UNLOCK(pip);
3906                 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0,
3907                     flags)) != MDI_SUCCESS) {
3908                         MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3909                             "!vo_path_offline failed: vdip %s%d %p: path %s %p",
3910                             ddi_driver_name(vdip), ddi_get_instance(vdip),
3911                             (void *)vdip, mdi_pi_spathname(pip), (void *)pip));
3912                 }
3913                 MDI_PI_LOCK(pip);
3914         }
3915 
3916         /*
3917          * Set the mdi_pathinfo node state and clear the transient condition
3918          */
3919         MDI_PI_SET_OFFLINE(pip);
3920         cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3921         MDI_PI_UNLOCK(pip);
3922 
3923         MDI_CLIENT_LOCK(ct);
3924         if (rv == MDI_SUCCESS) {
3925                 if (ct->ct_unstable == 0) {
3926                         dev_info_t      *cdip = ct->ct_dip;
3927 
3928                         /*
3929                          * Onlining the mdi_pathinfo node will impact the
3930                          * client state Update the client and dev_info node
3931                          * state accordingly
3932                          */
3933                         i_mdi_client_update_state(ct);
3934                         rv = NDI_SUCCESS;
3935                         if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
3936                                 if (cdip &&
3937                                     (i_ddi_node_state(cdip) >=
3938                                     DS_INITIALIZED)) {
3939                                         MDI_CLIENT_UNLOCK(ct);
3940                                         rv = ndi_devi_offline(cdip,
3941                                             NDI_DEVFS_CLEAN);
3942                                         MDI_CLIENT_LOCK(ct);
3943                                         if (rv != NDI_SUCCESS) {
3944                                                 /*
3945                                                  * ndi_devi_offline failed.
3946                                                  * Reset client flags to
3947                                                  * online.
3948                                                  */
3949                                                 MDI_DEBUG(4, (MDI_WARN, cdip,
3950                                                     "ndi_devi_offline failed: "
3951                                                     "error %x", rv));
3952                                                 MDI_CLIENT_SET_ONLINE(ct);
3953                                         }
3954                                 }
3955                         }
3956                         /*
3957                          * Convert to MDI error code
3958                          */
3959                         switch (rv) {
3960                         case NDI_SUCCESS:
3961                                 rv = MDI_SUCCESS;
3962                                 break;
3963                         case NDI_BUSY:
3964                                 rv = MDI_BUSY;
3965                                 break;
3966                         default:
3967                                 rv = MDI_FAILURE;
3968                                 break;
3969                         }
3970                 }
3971                 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3972                 i_mdi_report_path_state(ct, pip);
3973         }
3974 
3975         MDI_CLIENT_UNLOCK(ct);
3976 
3977         /*
3978          * Change in the mdi_pathinfo node state will impact the client state
3979          */
3980         MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
3981             "ct = %p pip = %p", (void *)ct, (void *)pip));
3982         return (rv);
3983 }
3984 
3985 /*
3986  * i_mdi_pi_online():
3987  *              Online a mdi_pathinfo node and call the vHCI driver's callback
3988  */
3989 static int
3990 i_mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3991 {
3992         mdi_vhci_t      *vh = NULL;
3993         mdi_client_t    *ct = NULL;
3994         mdi_phci_t      *ph;
3995         int             (*f)();
3996         int             rv;
3997 
3998         MDI_PI_LOCK(pip);
3999         ph = MDI_PI(pip)->pi_phci;
4000         vh = ph->ph_vhci;
4001         ct = MDI_PI(pip)->pi_client;
4002         MDI_PI_SET_ONLINING(pip)
4003         MDI_PI_UNLOCK(pip);
4004         f = vh->vh_ops->vo_pi_state_change;
4005         if (f != NULL)
4006                 rv = (*f)(vh->vh_dip, pip, MDI_PATHINFO_STATE_ONLINE, 0,
4007                     flags);
4008         MDI_CLIENT_LOCK(ct);
4009         MDI_PI_LOCK(pip);
4010         cv_broadcast(&MDI_PI(pip)->pi_state_cv);
4011         MDI_PI_UNLOCK(pip);
4012         if (rv == MDI_SUCCESS) {
4013                 dev_info_t      *cdip = ct->ct_dip;
4014 
4015                 rv = MDI_SUCCESS;
4016                 i_mdi_client_update_state(ct);
4017                 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL ||
4018                     MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4019                         if (cdip && !i_ddi_devi_attached(cdip)) {
4020                                 MDI_CLIENT_UNLOCK(ct);
4021                                 rv = ndi_devi_online(cdip, 0);
4022                                 MDI_CLIENT_LOCK(ct);
4023                                 if ((rv != NDI_SUCCESS) &&
4024                                     (MDI_CLIENT_STATE(ct) ==
4025                                     MDI_CLIENT_STATE_DEGRADED)) {
4026                                         MDI_CLIENT_SET_OFFLINE(ct);
4027                                 }
4028                                 if (rv != NDI_SUCCESS) {
4029                                         /* Reset the path state */
4030                                         MDI_PI_LOCK(pip);
4031                                         MDI_PI(pip)->pi_state =
4032                                             MDI_PI_OLD_STATE(pip);
4033                                         MDI_PI_UNLOCK(pip);
4034                                 }
4035                         }
4036                 }
4037                 switch (rv) {
4038                 case NDI_SUCCESS:
4039                         MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
4040                         i_mdi_report_path_state(ct, pip);
4041                         rv = MDI_SUCCESS;
4042                         break;
4043                 case NDI_BUSY:
4044                         rv = MDI_BUSY;
4045                         break;
4046                 default:
4047                         rv = MDI_FAILURE;
4048                         break;
4049                 }
4050         } else {
4051                 /* Reset the path state */
4052                 MDI_PI_LOCK(pip);
4053                 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
4054                 MDI_PI_UNLOCK(pip);
4055         }
4056         MDI_CLIENT_UNLOCK(ct);
4057         return (rv);
4058 }
4059 
4060 /*
4061  * mdi_pi_get_node_name():
4062  *              Get the name associated with a mdi_pathinfo node.
4063  *              Since pathinfo nodes are not directly named, we
4064  *              return the node_name of the client.
4065  *
4066  * Return Values:
4067  *              char *
4068  */
4069 char *
4070 mdi_pi_get_node_name(mdi_pathinfo_t *pip)
4071 {
4072         mdi_client_t    *ct;
4073 
4074         if (pip == NULL)
4075                 return (NULL);
4076         ct = MDI_PI(pip)->pi_client;
4077         if ((ct == NULL) || (ct->ct_dip == NULL))
4078                 return (NULL);
4079         return (ddi_node_name(ct->ct_dip));
4080 }
4081 
4082 /*
4083  * mdi_pi_get_addr():
4084  *              Get the unit address associated with a mdi_pathinfo node
4085  *
4086  * Return Values:
4087  *              char *
4088  */
4089 char *
4090 mdi_pi_get_addr(mdi_pathinfo_t *pip)
4091 {
4092         if (pip == NULL)
4093                 return (NULL);
4094 
4095         return (MDI_PI(pip)->pi_addr);
4096 }
4097 
4098 /*
4099  * mdi_pi_get_path_instance():
4100  *              Get the 'path_instance' of a mdi_pathinfo node
4101  *
4102  * Return Values:
4103  *              path_instance
4104  */
4105 int
4106 mdi_pi_get_path_instance(mdi_pathinfo_t *pip)
4107 {
4108         if (pip == NULL)
4109                 return (0);
4110 
4111         return (MDI_PI(pip)->pi_path_instance);
4112 }
4113 
4114 /*
4115  * mdi_pi_pathname():
4116  *              Return pointer to path to pathinfo node.
4117  */
4118 char *
4119 mdi_pi_pathname(mdi_pathinfo_t *pip)
4120 {
4121         if (pip == NULL)
4122                 return (NULL);
4123         return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip)));
4124 }
4125 
4126 /*
4127  * mdi_pi_spathname():
4128  *              Return pointer to shortpath to pathinfo node. Used for debug
4129  *              messages, so return "" instead of NULL when unknown.
4130  */
4131 char *
4132 mdi_pi_spathname(mdi_pathinfo_t *pip)
4133 {
4134         char    *spath = "";
4135 
4136         if (pip) {
4137                 spath = mdi_pi_spathname_by_instance(
4138                     mdi_pi_get_path_instance(pip));
4139                 if (spath == NULL)
4140                         spath = "";
4141         }
4142         return (spath);
4143 }
4144 
4145 char *
4146 mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path)
4147 {
4148         char *obp_path = NULL;
4149         if ((pip == NULL) || (path == NULL))
4150                 return (NULL);
4151 
4152         if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) {
4153                 (void) strcpy(path, obp_path);
4154                 (void) mdi_prop_free(obp_path);
4155         } else {
4156                 path = NULL;
4157         }
4158         return (path);
4159 }
4160 
4161 int
4162 mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component)
4163 {
4164         dev_info_t *pdip;
4165         char *obp_path = NULL;
4166         int rc = MDI_FAILURE;
4167 
4168         if (pip == NULL)
4169                 return (MDI_FAILURE);
4170 
4171         pdip = mdi_pi_get_phci(pip);
4172         if (pdip == NULL)
4173                 return (MDI_FAILURE);
4174 
4175         obp_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
4176 
4177         if (ddi_pathname_obp(pdip, obp_path) == NULL) {
4178                 (void) ddi_pathname(pdip, obp_path);
4179         }
4180 
4181         if (component) {
4182                 (void) strncat(obp_path, "/", MAXPATHLEN);
4183                 (void) strncat(obp_path, component, MAXPATHLEN);
4184         }
4185         rc = mdi_prop_update_string(pip, "obp-path", obp_path);
4186 
4187         if (obp_path)
4188                 kmem_free(obp_path, MAXPATHLEN);
4189         return (rc);
4190 }
4191 
4192 /*
4193  * mdi_pi_get_client():
4194  *              Get the client devinfo associated with a mdi_pathinfo node
4195  *
4196  * Return Values:
4197  *              Handle to client device dev_info node
4198  */
4199 dev_info_t *
4200 mdi_pi_get_client(mdi_pathinfo_t *pip)
4201 {
4202         dev_info_t      *dip = NULL;
4203         if (pip) {
4204                 dip = MDI_PI(pip)->pi_client->ct_dip;
4205         }
4206         return (dip);
4207 }
4208 
4209 /*
4210  * mdi_pi_get_phci():
4211  *              Get the pHCI devinfo associated with the mdi_pathinfo node
4212  * Return Values:
4213  *              Handle to dev_info node
4214  */
4215 dev_info_t *
4216 mdi_pi_get_phci(mdi_pathinfo_t *pip)
4217 {
4218         dev_info_t      *dip = NULL;
4219         mdi_phci_t      *ph;
4220 
4221         if (pip) {
4222                 ph = MDI_PI(pip)->pi_phci;
4223                 if (ph)
4224                         dip = ph->ph_dip;
4225         }
4226         return (dip);
4227 }
4228 
4229 /*
4230  * mdi_pi_get_client_private():
4231  *              Get the client private information associated with the
4232  *              mdi_pathinfo node
4233  */
4234 void *
4235 mdi_pi_get_client_private(mdi_pathinfo_t *pip)
4236 {
4237         void *cprivate = NULL;
4238         if (pip) {
4239                 cprivate = MDI_PI(pip)->pi_cprivate;
4240         }
4241         return (cprivate);
4242 }
4243 
4244 /*
4245  * mdi_pi_set_client_private():
4246  *              Set the client private information in the mdi_pathinfo node
4247  */
4248 void
4249 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv)
4250 {
4251         if (pip) {
4252                 MDI_PI(pip)->pi_cprivate = priv;
4253         }
4254 }
4255 
4256 /*
4257  * mdi_pi_get_phci_private():
4258  *              Get the pHCI private information associated with the
4259  *              mdi_pathinfo node
4260  */
4261 caddr_t
4262 mdi_pi_get_phci_private(mdi_pathinfo_t *pip)
4263 {
4264         caddr_t pprivate = NULL;
4265 
4266         if (pip) {
4267                 pprivate = MDI_PI(pip)->pi_pprivate;
4268         }
4269         return (pprivate);
4270 }
4271 
4272 /*
4273  * mdi_pi_set_phci_private():
4274  *              Set the pHCI private information in the mdi_pathinfo node
4275  */
4276 void
4277 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv)
4278 {
4279         if (pip) {
4280                 MDI_PI(pip)->pi_pprivate = priv;
4281         }
4282 }
4283 
4284 /*
4285  * mdi_pi_get_state():
4286  *              Get the mdi_pathinfo node state. Transient states are internal
4287  *              and not provided to the users
4288  */
4289 mdi_pathinfo_state_t
4290 mdi_pi_get_state(mdi_pathinfo_t *pip)
4291 {
4292         mdi_pathinfo_state_t    state = MDI_PATHINFO_STATE_INIT;
4293 
4294         if (pip) {
4295                 if (MDI_PI_IS_TRANSIENT(pip)) {
4296                         /*
4297                          * mdi_pathinfo is in state transition.  Return the
4298                          * last good state.
4299                          */
4300                         state = MDI_PI_OLD_STATE(pip);
4301                 } else {
4302                         state = MDI_PI_STATE(pip);
4303                 }
4304         }
4305         return (state);
4306 }
4307 
4308 /*
4309  * mdi_pi_get_flags():
4310  *              Get the mdi_pathinfo node flags.
4311  */
4312 uint_t
4313 mdi_pi_get_flags(mdi_pathinfo_t *pip)
4314 {
4315         return (pip ? MDI_PI(pip)->pi_flags : 0);
4316 }
4317 
4318 /*
4319  * Note that the following function needs to be the new interface for
4320  * mdi_pi_get_state when mpxio gets integrated to ON.
4321  */
4322 int
4323 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state,
4324                 uint32_t *ext_state)
4325 {
4326         *state = MDI_PATHINFO_STATE_INIT;
4327 
4328         if (pip) {
4329                 if (MDI_PI_IS_TRANSIENT(pip)) {
4330                         /*
4331                          * mdi_pathinfo is in state transition.  Return the
4332                          * last good state.
4333                          */
4334                         *state = MDI_PI_OLD_STATE(pip);
4335                         *ext_state = MDI_PI_OLD_EXT_STATE(pip);
4336                 } else {
4337                         *state = MDI_PI_STATE(pip);
4338                         *ext_state = MDI_PI_EXT_STATE(pip);
4339                 }
4340         }
4341         return (MDI_SUCCESS);
4342 }
4343 
4344 /*
4345  * mdi_pi_get_preferred:
4346  *      Get the preferred path flag
4347  */
4348 int
4349 mdi_pi_get_preferred(mdi_pathinfo_t *pip)
4350 {
4351         if (pip) {
4352                 return (MDI_PI(pip)->pi_preferred);
4353         }
4354         return (0);
4355 }
4356 
4357 /*
4358  * mdi_pi_set_preferred:
4359  *      Set the preferred path flag
4360  */
4361 void
4362 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred)
4363 {
4364         if (pip) {
4365                 MDI_PI(pip)->pi_preferred = preferred;
4366         }
4367 }
4368 
4369 /*
4370  * mdi_pi_set_state():
4371  *              Set the mdi_pathinfo node state
4372  */
4373 void
4374 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state)
4375 {
4376         uint32_t        ext_state;
4377 
4378         if (pip) {
4379                 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK;
4380                 MDI_PI(pip)->pi_state = state;
4381                 MDI_PI(pip)->pi_state |= ext_state;
4382 
4383                 /* Path has changed state, invalidate DINFOCACHE snap shot. */
4384                 i_ddi_di_cache_invalidate();
4385         }
4386 }
4387 
4388 /*
4389  * Property functions:
4390  */
4391 int
4392 i_map_nvlist_error_to_mdi(int val)
4393 {
4394         int rv;
4395 
4396         switch (val) {
4397         case 0:
4398                 rv = DDI_PROP_SUCCESS;
4399                 break;
4400         case EINVAL:
4401         case ENOTSUP:
4402                 rv = DDI_PROP_INVAL_ARG;
4403                 break;
4404         case ENOMEM:
4405                 rv = DDI_PROP_NO_MEMORY;
4406                 break;
4407         default:
4408                 rv = DDI_PROP_NOT_FOUND;
4409                 break;
4410         }
4411         return (rv);
4412 }
4413 
4414 /*
4415  * mdi_pi_get_next_prop():
4416  *              Property walk function.  The caller should hold mdi_pi_lock()
4417  *              and release by calling mdi_pi_unlock() at the end of walk to
4418  *              get a consistent value.
4419  */
4420 nvpair_t *
4421 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev)
4422 {
4423         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4424                 return (NULL);
4425         }
4426         ASSERT(MDI_PI_LOCKED(pip));
4427         return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev));
4428 }
4429 
4430 /*
4431  * mdi_prop_remove():
4432  *              Remove the named property from the named list.
4433  */
4434 int
4435 mdi_prop_remove(mdi_pathinfo_t *pip, char *name)
4436 {
4437         if (pip == NULL) {
4438                 return (DDI_PROP_NOT_FOUND);
4439         }
4440         ASSERT(!MDI_PI_LOCKED(pip));
4441         MDI_PI_LOCK(pip);
4442         if (MDI_PI(pip)->pi_prop == NULL) {
4443                 MDI_PI_UNLOCK(pip);
4444                 return (DDI_PROP_NOT_FOUND);
4445         }
4446         if (name) {
4447                 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name);
4448         } else {
4449                 char            nvp_name[MAXNAMELEN];
4450                 nvpair_t        *nvp;
4451                 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL);
4452                 while (nvp) {
4453                         nvpair_t        *next;
4454                         next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp);
4455                         (void) snprintf(nvp_name, sizeof(nvp_name), "%s",
4456                             nvpair_name(nvp));
4457                         (void) nvlist_remove_all(MDI_PI(pip)->pi_prop,
4458                             nvp_name);
4459                         nvp = next;
4460                 }
4461         }
4462         MDI_PI_UNLOCK(pip);
4463         return (DDI_PROP_SUCCESS);
4464 }
4465 
4466 /*
4467  * mdi_prop_size():
4468  *              Get buffer size needed to pack the property data.
4469  *              Caller should hold the mdi_pathinfo_t lock to get a consistent
4470  *              buffer size.
4471  */
4472 int
4473 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp)
4474 {
4475         int     rv;
4476         size_t  bufsize;
4477 
4478         *buflenp = 0;
4479         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4480                 return (DDI_PROP_NOT_FOUND);
4481         }
4482         ASSERT(MDI_PI_LOCKED(pip));
4483         rv = nvlist_size(MDI_PI(pip)->pi_prop,
4484             &bufsize, NV_ENCODE_NATIVE);
4485         *buflenp = bufsize;
4486         return (i_map_nvlist_error_to_mdi(rv));
4487 }
4488 
4489 /*
4490  * mdi_prop_pack():
4491  *              pack the property list.  The caller should hold the
4492  *              mdi_pathinfo_t node to get a consistent data
4493  */
4494 int
4495 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen)
4496 {
4497         int     rv;
4498         size_t  bufsize;
4499 
4500         if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) {
4501                 return (DDI_PROP_NOT_FOUND);
4502         }
4503 
4504         ASSERT(MDI_PI_LOCKED(pip));
4505 
4506         bufsize = buflen;
4507         rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize,
4508             NV_ENCODE_NATIVE, KM_SLEEP);
4509 
4510         return (i_map_nvlist_error_to_mdi(rv));
4511 }
4512 
4513 /*
4514  * mdi_prop_update_byte():
4515  *              Create/Update a byte property
4516  */
4517 int
4518 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data)
4519 {
4520         int rv;
4521 
4522         if (pip == NULL) {
4523                 return (DDI_PROP_INVAL_ARG);
4524         }
4525         ASSERT(!MDI_PI_LOCKED(pip));
4526         MDI_PI_LOCK(pip);
4527         if (MDI_PI(pip)->pi_prop == NULL) {
4528                 MDI_PI_UNLOCK(pip);
4529                 return (DDI_PROP_NOT_FOUND);
4530         }
4531         rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data);
4532         MDI_PI_UNLOCK(pip);
4533         return (i_map_nvlist_error_to_mdi(rv));
4534 }
4535 
4536 /*
4537  * mdi_prop_update_byte_array():
4538  *              Create/Update a byte array property
4539  */
4540 int
4541 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data,
4542     uint_t nelements)
4543 {
4544         int rv;
4545 
4546         if (pip == NULL) {
4547                 return (DDI_PROP_INVAL_ARG);
4548         }
4549         ASSERT(!MDI_PI_LOCKED(pip));
4550         MDI_PI_LOCK(pip);
4551         if (MDI_PI(pip)->pi_prop == NULL) {
4552                 MDI_PI_UNLOCK(pip);
4553                 return (DDI_PROP_NOT_FOUND);
4554         }
4555         rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements);
4556         MDI_PI_UNLOCK(pip);
4557         return (i_map_nvlist_error_to_mdi(rv));
4558 }
4559 
4560 /*
4561  * mdi_prop_update_int():
4562  *              Create/Update a 32 bit integer property
4563  */
4564 int
4565 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data)
4566 {
4567         int rv;
4568 
4569         if (pip == NULL) {
4570                 return (DDI_PROP_INVAL_ARG);
4571         }
4572         ASSERT(!MDI_PI_LOCKED(pip));
4573         MDI_PI_LOCK(pip);
4574         if (MDI_PI(pip)->pi_prop == NULL) {
4575                 MDI_PI_UNLOCK(pip);
4576                 return (DDI_PROP_NOT_FOUND);
4577         }
4578         rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data);
4579         MDI_PI_UNLOCK(pip);
4580         return (i_map_nvlist_error_to_mdi(rv));
4581 }
4582 
4583 /*
4584  * mdi_prop_update_int64():
4585  *              Create/Update a 64 bit integer property
4586  */
4587 int
4588 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data)
4589 {
4590         int rv;
4591 
4592         if (pip == NULL) {
4593                 return (DDI_PROP_INVAL_ARG);
4594         }
4595         ASSERT(!MDI_PI_LOCKED(pip));
4596         MDI_PI_LOCK(pip);
4597         if (MDI_PI(pip)->pi_prop == NULL) {
4598                 MDI_PI_UNLOCK(pip);
4599                 return (DDI_PROP_NOT_FOUND);
4600         }
4601         rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data);
4602         MDI_PI_UNLOCK(pip);
4603         return (i_map_nvlist_error_to_mdi(rv));
4604 }
4605 
4606 /*
4607  * mdi_prop_update_int_array():
4608  *              Create/Update a int array property
4609  */
4610 int
4611 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data,
4612             uint_t nelements)
4613 {
4614         int rv;
4615 
4616         if (pip == NULL) {
4617                 return (DDI_PROP_INVAL_ARG);
4618         }
4619         ASSERT(!MDI_PI_LOCKED(pip));
4620         MDI_PI_LOCK(pip);
4621         if (MDI_PI(pip)->pi_prop == NULL) {
4622                 MDI_PI_UNLOCK(pip);
4623                 return (DDI_PROP_NOT_FOUND);
4624         }
4625         rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data,
4626             nelements);
4627         MDI_PI_UNLOCK(pip);
4628         return (i_map_nvlist_error_to_mdi(rv));
4629 }
4630 
4631 /*
4632  * mdi_prop_update_string():
4633  *              Create/Update a string property
4634  */
4635 int
4636 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data)
4637 {
4638         int rv;
4639 
4640         if (pip == NULL) {
4641                 return (DDI_PROP_INVAL_ARG);
4642         }
4643         ASSERT(!MDI_PI_LOCKED(pip));
4644         MDI_PI_LOCK(pip);
4645         if (MDI_PI(pip)->pi_prop == NULL) {
4646                 MDI_PI_UNLOCK(pip);
4647                 return (DDI_PROP_NOT_FOUND);
4648         }
4649         rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data);
4650         MDI_PI_UNLOCK(pip);
4651         return (i_map_nvlist_error_to_mdi(rv));
4652 }
4653 
4654 /*
4655  * mdi_prop_update_string_array():
4656  *              Create/Update a string array property
4657  */
4658 int
4659 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data,
4660     uint_t nelements)
4661 {
4662         int rv;
4663 
4664         if (pip == NULL) {
4665                 return (DDI_PROP_INVAL_ARG);
4666         }
4667         ASSERT(!MDI_PI_LOCKED(pip));
4668         MDI_PI_LOCK(pip);
4669         if (MDI_PI(pip)->pi_prop == NULL) {
4670                 MDI_PI_UNLOCK(pip);
4671                 return (DDI_PROP_NOT_FOUND);
4672         }
4673         rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data,
4674             nelements);
4675         MDI_PI_UNLOCK(pip);
4676         return (i_map_nvlist_error_to_mdi(rv));
4677 }
4678 
4679 /*
4680  * mdi_prop_lookup_byte():
4681  *              Look for byte property identified by name.  The data returned
4682  *              is the actual property and valid as long as mdi_pathinfo_t node
4683  *              is alive.
4684  */
4685 int
4686 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data)
4687 {
4688         int rv;
4689 
4690         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4691                 return (DDI_PROP_NOT_FOUND);
4692         }
4693         rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data);
4694         return (i_map_nvlist_error_to_mdi(rv));
4695 }
4696 
4697 
4698 /*
4699  * mdi_prop_lookup_byte_array():
4700  *              Look for byte array property identified by name.  The data
4701  *              returned is the actual property and valid as long as
4702  *              mdi_pathinfo_t node is alive.
4703  */
4704 int
4705 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data,
4706     uint_t *nelements)
4707 {
4708         int rv;
4709 
4710         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4711                 return (DDI_PROP_NOT_FOUND);
4712         }
4713         rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data,
4714             nelements);
4715         return (i_map_nvlist_error_to_mdi(rv));
4716 }
4717 
4718 /*
4719  * mdi_prop_lookup_int():
4720  *              Look for int property identified by name.  The data returned
4721  *              is the actual property and valid as long as mdi_pathinfo_t
4722  *              node is alive.
4723  */
4724 int
4725 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data)
4726 {
4727         int rv;
4728 
4729         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4730                 return (DDI_PROP_NOT_FOUND);
4731         }
4732         rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data);
4733         return (i_map_nvlist_error_to_mdi(rv));
4734 }
4735 
4736 /*
4737  * mdi_prop_lookup_int64():
4738  *              Look for int64 property identified by name.  The data returned
4739  *              is the actual property and valid as long as mdi_pathinfo_t node
4740  *              is alive.
4741  */
4742 int
4743 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data)
4744 {
4745         int rv;
4746         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4747                 return (DDI_PROP_NOT_FOUND);
4748         }
4749         rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data);
4750         return (i_map_nvlist_error_to_mdi(rv));
4751 }
4752 
4753 /*
4754  * mdi_prop_lookup_int_array():
4755  *              Look for int array property identified by name.  The data
4756  *              returned is the actual property and valid as long as
4757  *              mdi_pathinfo_t node is alive.
4758  */
4759 int
4760 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data,
4761     uint_t *nelements)
4762 {
4763         int rv;
4764 
4765         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4766                 return (DDI_PROP_NOT_FOUND);
4767         }
4768         rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name,
4769             (int32_t **)data, nelements);
4770         return (i_map_nvlist_error_to_mdi(rv));
4771 }
4772 
4773 /*
4774  * mdi_prop_lookup_string():
4775  *              Look for string property identified by name.  The data
4776  *              returned is the actual property and valid as long as
4777  *              mdi_pathinfo_t node is alive.
4778  */
4779 int
4780 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data)
4781 {
4782         int rv;
4783 
4784         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4785                 return (DDI_PROP_NOT_FOUND);
4786         }
4787         rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data);
4788         return (i_map_nvlist_error_to_mdi(rv));
4789 }
4790 
4791 /*
4792  * mdi_prop_lookup_string_array():
4793  *              Look for string array property identified by name.  The data
4794  *              returned is the actual property and valid as long as
4795  *              mdi_pathinfo_t node is alive.
4796  */
4797 int
4798 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data,
4799     uint_t *nelements)
4800 {
4801         int rv;
4802 
4803         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4804                 return (DDI_PROP_NOT_FOUND);
4805         }
4806         rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data,
4807             nelements);
4808         return (i_map_nvlist_error_to_mdi(rv));
4809 }
4810 
4811 /*
4812  * mdi_prop_free():
4813  *              Symmetrical function to ddi_prop_free(). nvlist_lookup_xx()
4814  *              functions return the pointer to actual property data and not a
4815  *              copy of it.  So the data returned is valid as long as
4816  *              mdi_pathinfo_t node is valid.
4817  */
4818 /*ARGSUSED*/
4819 int
4820 mdi_prop_free(void *data)
4821 {
4822         return (DDI_PROP_SUCCESS);
4823 }
4824 
4825 /*ARGSUSED*/
4826 static void
4827 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip)
4828 {
4829         char            *ct_path;
4830         char            *ct_status;
4831         char            *status;
4832         dev_info_t      *cdip = ct->ct_dip;
4833         char            lb_buf[64];
4834         int             report_lb_c = 0, report_lb_p = 0;
4835 
4836         ASSERT(MDI_CLIENT_LOCKED(ct));
4837         if ((cdip == NULL) || (ddi_get_instance(cdip) == -1) ||
4838             (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) {
4839                 return;
4840         }
4841         if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) {
4842                 ct_status = "optimal";
4843                 report_lb_c = 1;
4844         } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4845                 ct_status = "degraded";
4846         } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
4847                 ct_status = "failed";
4848         } else {
4849                 ct_status = "unknown";
4850         }
4851 
4852         lb_buf[0] = 0;          /* not interested in load balancing config */
4853 
4854         if (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)) {
4855                 status = "removed";
4856         } else if (MDI_PI_IS_OFFLINE(pip)) {
4857                 status = "offline";
4858         } else if (MDI_PI_IS_ONLINE(pip)) {
4859                 status = "online";
4860                 report_lb_p = 1;
4861         } else if (MDI_PI_IS_STANDBY(pip)) {
4862                 status = "standby";
4863         } else if (MDI_PI_IS_FAULT(pip)) {
4864                 status = "faulted";
4865         } else {
4866                 status = "unknown";
4867         }
4868 
4869         if (cdip) {
4870                 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4871 
4872                 /*
4873                  * NOTE: Keeping "multipath status: %s" and
4874                  * "Load balancing: %s" format unchanged in case someone
4875                  * scrubs /var/adm/messages looking for these messages.
4876                  */
4877                 if (report_lb_c && report_lb_p) {
4878                         if (ct->ct_lb == LOAD_BALANCE_LBA) {
4879                                 (void) snprintf(lb_buf, sizeof (lb_buf),
4880                                     "%s, region-size: %d", mdi_load_balance_lba,
4881                                     ct->ct_lb_args->region_size);
4882                         } else if (ct->ct_lb == LOAD_BALANCE_NONE) {
4883                                 (void) snprintf(lb_buf, sizeof (lb_buf),
4884                                     "%s", mdi_load_balance_none);
4885                         } else {
4886                                 (void) snprintf(lb_buf, sizeof (lb_buf), "%s",
4887                                     mdi_load_balance_rr);
4888                         }
4889 
4890                         cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT,
4891                             "?%s (%s%d) multipath status: %s: "
4892                             "path %d %s is %s: Load balancing: %s\n",
4893                             ddi_pathname(cdip, ct_path), ddi_driver_name(cdip),
4894                             ddi_get_instance(cdip), ct_status,
4895                             mdi_pi_get_path_instance(pip),
4896                             mdi_pi_spathname(pip), status, lb_buf);
4897                 } else {
4898                         cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT,
4899                             "?%s (%s%d) multipath status: %s: "
4900                             "path %d %s is %s\n",
4901                             ddi_pathname(cdip, ct_path), ddi_driver_name(cdip),
4902                             ddi_get_instance(cdip), ct_status,
4903                             mdi_pi_get_path_instance(pip),
4904                             mdi_pi_spathname(pip), status);
4905                 }
4906 
4907                 kmem_free(ct_path, MAXPATHLEN);
4908                 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct);
4909         }
4910 }
4911 
4912 #ifdef  DEBUG
4913 /*
4914  * i_mdi_log():
4915  *              Utility function for error message management
4916  *
4917  *              NOTE: Implementation takes care of trailing \n for cmn_err,
4918  *              MDI_DEBUG should not terminate fmt strings with \n.
4919  *
4920  *              NOTE: If the level is >= 2, and there is no leading !?^
4921  *              then a leading ! is implied (but can be overriden via
4922  *              mdi_debug_consoleonly). If you are using kmdb on the console,
4923  *              consider setting mdi_debug_consoleonly to 1 as an aid.
4924  */
4925 /*PRINTFLIKE4*/
4926 static void
4927 i_mdi_log(int level, const char *func, dev_info_t *dip, const char *fmt, ...)
4928 {
4929         char            name[MAXNAMELEN];
4930         char            buf[512];
4931         char            *bp;
4932         va_list         ap;
4933         int             log_only = 0;
4934         int             boot_only = 0;
4935         int             console_only = 0;
4936 
4937         if (dip) {
4938                 (void) snprintf(name, sizeof(name), "%s%d: ",
4939                     ddi_driver_name(dip), ddi_get_instance(dip));
4940         } else {
4941                 name[0] = 0;
4942         }
4943 
4944         va_start(ap, fmt);
4945         (void) vsnprintf(buf, sizeof(buf), fmt, ap);
4946         va_end(ap);
4947 
4948         switch (buf[0]) {
4949         case '!':
4950                 bp = &buf[1];
4951                 log_only = 1;
4952                 break;
4953         case '?':
4954                 bp = &buf[1];
4955                 boot_only = 1;
4956                 break;
4957         case '^':
4958                 bp = &buf[1];
4959                 console_only = 1;
4960                 break;
4961         default:
4962                 if (level >= 2)
4963                         log_only = 1;           /* ! implied */
4964                 bp = buf;
4965                 break;
4966         }
4967         if (mdi_debug_logonly) {
4968                 log_only = 1;
4969                 boot_only = 0;
4970                 console_only = 0;
4971         }
4972         if (mdi_debug_consoleonly) {
4973                 log_only = 0;
4974                 boot_only = 0;
4975                 console_only = 1;
4976                 level = CE_NOTE;
4977                 goto console;
4978         }
4979 
4980         switch (level) {
4981         case CE_NOTE:
4982                 level = CE_CONT;
4983                 /* FALLTHROUGH */
4984         case CE_CONT:
4985                 if (boot_only) {
4986                         cmn_err(level, "?mdi: %s%s: %s\n", name, func, bp);
4987                 } else if (console_only) {
4988                         cmn_err(level, "^mdi: %s%s: %s\n", name, func, bp);
4989                 } else if (log_only) {
4990                         cmn_err(level, "!mdi: %s%s: %s\n", name, func, bp);
4991                 } else {
4992                         cmn_err(level, "mdi: %s%s: %s\n", name, func, bp);
4993                 }
4994                 break;
4995 
4996         case CE_WARN:
4997         case CE_PANIC:
4998         console:
4999                 if (boot_only) {
5000                         cmn_err(level, "?mdi: %s%s: %s", name, func, bp);
5001                 } else if (console_only) {
5002                         cmn_err(level, "^mdi: %s%s: %s", name, func, bp);
5003                 } else if (log_only) {
5004                         cmn_err(level, "!mdi: %s%s: %s", name, func, bp);
5005                 } else {
5006                         cmn_err(level, "mdi: %s%s: %s", name, func, bp);
5007                 }
5008                 break;
5009         default:
5010                 cmn_err(level, "mdi: %s%s", name, bp);
5011                 break;
5012         }
5013 }
5014 #endif  /* DEBUG */
5015 
5016 void
5017 i_mdi_client_online(dev_info_t *ct_dip)
5018 {
5019         mdi_client_t    *ct;
5020 
5021         /*
5022          * Client online notification. Mark client state as online
5023          * restore our binding with dev_info node
5024          */
5025         ct = i_devi_get_client(ct_dip);
5026         ASSERT(ct != NULL);
5027         MDI_CLIENT_LOCK(ct);
5028         MDI_CLIENT_SET_ONLINE(ct);
5029         /* catch for any memory leaks */
5030         ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip));
5031         ct->ct_dip = ct_dip;
5032 
5033         if (ct->ct_power_cnt == 0)
5034                 (void) i_mdi_power_all_phci(ct);
5035 
5036         MDI_DEBUG(4, (MDI_NOTE, ct_dip,
5037             "i_mdi_pm_hold_client %p", (void *)ct));
5038         i_mdi_pm_hold_client(ct, 1);
5039 
5040         MDI_CLIENT_UNLOCK(ct);
5041 }
5042 
5043 void
5044 i_mdi_phci_online(dev_info_t *ph_dip)
5045 {
5046         mdi_phci_t      *ph;
5047 
5048         /* pHCI online notification. Mark state accordingly */
5049         ph = i_devi_get_phci(ph_dip);
5050         ASSERT(ph != NULL);
5051         MDI_PHCI_LOCK(ph);
5052         MDI_PHCI_SET_ONLINE(ph);
5053         MDI_PHCI_UNLOCK(ph);
5054 }
5055 
5056 /*
5057  * mdi_devi_online():
5058  *              Online notification from NDI framework on pHCI/client
5059  *              device online.
5060  * Return Values:
5061  *              NDI_SUCCESS
5062  *              MDI_FAILURE
5063  */
5064 /*ARGSUSED*/
5065 int
5066 mdi_devi_online(dev_info_t *dip, uint_t flags)
5067 {
5068         if (MDI_PHCI(dip)) {
5069                 i_mdi_phci_online(dip);
5070         }
5071 
5072         if (MDI_CLIENT(dip)) {
5073                 i_mdi_client_online(dip);
5074         }
5075         return (NDI_SUCCESS);
5076 }
5077 
5078 /*
5079  * mdi_devi_offline():
5080  *              Offline notification from NDI framework on pHCI/Client device
5081  *              offline.
5082  *
5083  * Return Values:
5084  *              NDI_SUCCESS
5085  *              NDI_FAILURE
5086  */
5087 /*ARGSUSED*/
5088 int
5089 mdi_devi_offline(dev_info_t *dip, uint_t flags)
5090 {
5091         int             rv = NDI_SUCCESS;
5092 
5093         if (MDI_CLIENT(dip)) {
5094                 rv = i_mdi_client_offline(dip, flags);
5095                 if (rv != NDI_SUCCESS)
5096                         return (rv);
5097         }
5098 
5099         if (MDI_PHCI(dip)) {
5100                 rv = i_mdi_phci_offline(dip, flags);
5101 
5102                 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) {
5103                         /* set client back online */
5104                         i_mdi_client_online(dip);
5105                 }
5106         }
5107 
5108         return (rv);
5109 }
5110 
5111 /*ARGSUSED*/
5112 static int
5113 i_mdi_phci_offline(dev_info_t *dip, uint_t flags)
5114 {
5115         int             rv = NDI_SUCCESS;
5116         mdi_phci_t      *ph;
5117         mdi_client_t    *ct;
5118         mdi_pathinfo_t  *pip;
5119         mdi_pathinfo_t  *next;
5120         mdi_pathinfo_t  *failed_pip = NULL;
5121         dev_info_t      *cdip;
5122 
5123         /*
5124          * pHCI component offline notification
5125          * Make sure that this pHCI instance is free to be offlined.
5126          * If it is OK to proceed, Offline and remove all the child
5127          * mdi_pathinfo nodes.  This process automatically offlines
5128          * corresponding client devices, for which this pHCI provides
5129          * critical services.
5130          */
5131         ph = i_devi_get_phci(dip);
5132         MDI_DEBUG(2, (MDI_NOTE, dip,
5133             "called %p %p", (void *)dip, (void *)ph));
5134         if (ph == NULL) {
5135                 return (rv);
5136         }
5137 
5138         MDI_PHCI_LOCK(ph);
5139 
5140         if (MDI_PHCI_IS_OFFLINE(ph)) {
5141                 MDI_DEBUG(1, (MDI_WARN, dip,
5142                     "!pHCI already offlined: %p", (void *)dip));
5143                 MDI_PHCI_UNLOCK(ph);
5144                 return (NDI_SUCCESS);
5145         }
5146 
5147         /*
5148          * Check to see if the pHCI can be offlined
5149          */
5150         if (ph->ph_unstable) {
5151                 MDI_DEBUG(1, (MDI_WARN, dip,
5152                     "!One or more target devices are in transient state. "
5153                     "This device can not be removed at this moment. "
5154                     "Please try again later."));
5155                 MDI_PHCI_UNLOCK(ph);
5156                 return (NDI_BUSY);
5157         }
5158 
5159         pip = ph->ph_path_head;
5160         while (pip != NULL) {
5161                 MDI_PI_LOCK(pip);
5162                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5163 
5164                 /*
5165                  * The mdi_pathinfo state is OK. Check the client state.
5166                  * If failover in progress fail the pHCI from offlining
5167                  */
5168                 ct = MDI_PI(pip)->pi_client;
5169                 i_mdi_client_lock(ct, pip);
5170                 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5171                     (ct->ct_unstable)) {
5172                         /*
5173                          * Failover is in progress, Fail the DR
5174                          */
5175                         MDI_DEBUG(1, (MDI_WARN, dip,
5176                             "!pHCI device is busy. "
5177                             "This device can not be removed at this moment. "
5178                             "Please try again later."));
5179                         MDI_PI_UNLOCK(pip);
5180                         i_mdi_client_unlock(ct);
5181                         MDI_PHCI_UNLOCK(ph);
5182                         return (NDI_BUSY);
5183                 }
5184                 MDI_PI_UNLOCK(pip);
5185 
5186                 /*
5187                  * Check to see of we are removing the last path of this
5188                  * client device...
5189                  */
5190                 cdip = ct->ct_dip;
5191                 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5192                     (i_mdi_client_compute_state(ct, ph) ==
5193                     MDI_CLIENT_STATE_FAILED)) {
5194                         i_mdi_client_unlock(ct);
5195                         MDI_PHCI_UNLOCK(ph);
5196                         if (ndi_devi_offline(cdip,
5197                             NDI_DEVFS_CLEAN) != NDI_SUCCESS) {
5198                                 /*
5199                                  * ndi_devi_offline() failed.
5200                                  * This pHCI provides the critical path
5201                                  * to one or more client devices.
5202                                  * Return busy.
5203                                  */
5204                                 MDI_PHCI_LOCK(ph);
5205                                 MDI_DEBUG(1, (MDI_WARN, dip,
5206                                     "!pHCI device is busy. "
5207                                     "This device can not be removed at this "
5208                                     "moment. Please try again later."));
5209                                 failed_pip = pip;
5210                                 break;
5211                         } else {
5212                                 MDI_PHCI_LOCK(ph);
5213                                 pip = next;
5214                         }
5215                 } else {
5216                         i_mdi_client_unlock(ct);
5217                         pip = next;
5218                 }
5219         }
5220 
5221         if (failed_pip) {
5222                 pip = ph->ph_path_head;
5223                 while (pip != failed_pip) {
5224                         MDI_PI_LOCK(pip);
5225                         next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5226                         ct = MDI_PI(pip)->pi_client;
5227                         i_mdi_client_lock(ct, pip);
5228                         cdip = ct->ct_dip;
5229                         switch (MDI_CLIENT_STATE(ct)) {
5230                         case MDI_CLIENT_STATE_OPTIMAL:
5231                         case MDI_CLIENT_STATE_DEGRADED:
5232                                 if (cdip) {
5233                                         MDI_PI_UNLOCK(pip);
5234                                         i_mdi_client_unlock(ct);
5235                                         MDI_PHCI_UNLOCK(ph);
5236                                         (void) ndi_devi_online(cdip, 0);
5237                                         MDI_PHCI_LOCK(ph);
5238                                         pip = next;
5239                                         continue;
5240                                 }
5241                                 break;
5242 
5243                         case MDI_CLIENT_STATE_FAILED:
5244                                 if (cdip) {
5245                                         MDI_PI_UNLOCK(pip);
5246                                         i_mdi_client_unlock(ct);
5247                                         MDI_PHCI_UNLOCK(ph);
5248                                         (void) ndi_devi_offline(cdip,
5249                                                 NDI_DEVFS_CLEAN);
5250                                         MDI_PHCI_LOCK(ph);
5251                                         pip = next;
5252                                         continue;
5253                                 }
5254                                 break;
5255                         }
5256                         MDI_PI_UNLOCK(pip);
5257                         i_mdi_client_unlock(ct);
5258                         pip = next;
5259                 }
5260                 MDI_PHCI_UNLOCK(ph);
5261                 return (NDI_BUSY);
5262         }
5263 
5264         /*
5265          * Mark the pHCI as offline
5266          */
5267         MDI_PHCI_SET_OFFLINE(ph);
5268 
5269         /*
5270          * Mark the child mdi_pathinfo nodes as transient
5271          */
5272         pip = ph->ph_path_head;
5273         while (pip != NULL) {
5274                 MDI_PI_LOCK(pip);
5275                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5276                 MDI_PI_SET_OFFLINING(pip);
5277                 MDI_PI_UNLOCK(pip);
5278                 pip = next;
5279         }
5280         MDI_PHCI_UNLOCK(ph);
5281         /*
5282          * Give a chance for any pending commands to execute
5283          */
5284         delay_random(mdi_delay);
5285         MDI_PHCI_LOCK(ph);
5286         pip = ph->ph_path_head;
5287         while (pip != NULL) {
5288                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5289                 (void) i_mdi_pi_offline(pip, flags);
5290                 MDI_PI_LOCK(pip);
5291                 ct = MDI_PI(pip)->pi_client;
5292                 if (!MDI_PI_IS_OFFLINE(pip)) {
5293                         MDI_DEBUG(1, (MDI_WARN, dip,
5294                             "!pHCI device is busy. "
5295                             "This device can not be removed at this moment. "
5296                             "Please try again later."));
5297                         MDI_PI_UNLOCK(pip);
5298                         MDI_PHCI_SET_ONLINE(ph);
5299                         MDI_PHCI_UNLOCK(ph);
5300                         return (NDI_BUSY);
5301                 }
5302                 MDI_PI_UNLOCK(pip);
5303                 pip = next;
5304         }
5305         MDI_PHCI_UNLOCK(ph);
5306 
5307         return (rv);
5308 }
5309 
5310 void
5311 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array)
5312 {
5313         mdi_phci_t      *ph;
5314         mdi_client_t    *ct;
5315         mdi_pathinfo_t  *pip;
5316         mdi_pathinfo_t  *next;
5317         dev_info_t      *cdip;
5318 
5319         if (!MDI_PHCI(dip))
5320                 return;
5321 
5322         ph = i_devi_get_phci(dip);
5323         if (ph == NULL) {
5324                 return;
5325         }
5326 
5327         MDI_PHCI_LOCK(ph);
5328 
5329         if (MDI_PHCI_IS_OFFLINE(ph)) {
5330                 /* has no last path */
5331                 MDI_PHCI_UNLOCK(ph);
5332                 return;
5333         }
5334 
5335         pip = ph->ph_path_head;
5336         while (pip != NULL) {
5337                 MDI_PI_LOCK(pip);
5338                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5339 
5340                 ct = MDI_PI(pip)->pi_client;
5341                 i_mdi_client_lock(ct, pip);
5342                 MDI_PI_UNLOCK(pip);
5343 
5344                 cdip = ct->ct_dip;
5345                 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5346                     (i_mdi_client_compute_state(ct, ph) ==
5347                     MDI_CLIENT_STATE_FAILED)) {
5348                         /* Last path. Mark client dip as retiring */
5349                         i_mdi_client_unlock(ct);
5350                         MDI_PHCI_UNLOCK(ph);
5351                         (void) e_ddi_mark_retiring(cdip, cons_array);
5352                         MDI_PHCI_LOCK(ph);
5353                         pip = next;
5354                 } else {
5355                         i_mdi_client_unlock(ct);
5356                         pip = next;
5357                 }
5358         }
5359 
5360         MDI_PHCI_UNLOCK(ph);
5361 
5362         return;
5363 }
5364 
5365 void
5366 mdi_phci_retire_notify(dev_info_t *dip, int *constraint)
5367 {
5368         mdi_phci_t      *ph;
5369         mdi_client_t    *ct;
5370         mdi_pathinfo_t  *pip;
5371         mdi_pathinfo_t  *next;
5372         dev_info_t      *cdip;
5373 
5374         if (!MDI_PHCI(dip))
5375                 return;
5376 
5377         ph = i_devi_get_phci(dip);
5378         if (ph == NULL)
5379                 return;
5380 
5381         MDI_PHCI_LOCK(ph);
5382 
5383         if (MDI_PHCI_IS_OFFLINE(ph)) {
5384                 MDI_PHCI_UNLOCK(ph);
5385                 /* not last path */
5386                 return;
5387         }
5388 
5389         if (ph->ph_unstable) {
5390                 MDI_PHCI_UNLOCK(ph);
5391                 /* can't check for constraints */
5392                 *constraint = 0;
5393                 return;
5394         }
5395 
5396         pip = ph->ph_path_head;
5397         while (pip != NULL) {
5398                 MDI_PI_LOCK(pip);
5399                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5400 
5401                 /*
5402                  * The mdi_pathinfo state is OK. Check the client state.
5403                  * If failover in progress fail the pHCI from offlining
5404                  */
5405                 ct = MDI_PI(pip)->pi_client;
5406                 i_mdi_client_lock(ct, pip);
5407                 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5408                     (ct->ct_unstable)) {
5409                         /*
5410                          * Failover is in progress, can't check for constraints
5411                          */
5412                         MDI_PI_UNLOCK(pip);
5413                         i_mdi_client_unlock(ct);
5414                         MDI_PHCI_UNLOCK(ph);
5415                         *constraint = 0;
5416                         return;
5417                 }
5418                 MDI_PI_UNLOCK(pip);
5419 
5420                 /*
5421                  * Check to see of we are retiring the last path of this
5422                  * client device...
5423                  */
5424                 cdip = ct->ct_dip;
5425                 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5426                     (i_mdi_client_compute_state(ct, ph) ==
5427                     MDI_CLIENT_STATE_FAILED)) {
5428                         i_mdi_client_unlock(ct);
5429                         MDI_PHCI_UNLOCK(ph);
5430                         (void) e_ddi_retire_notify(cdip, constraint);
5431                         MDI_PHCI_LOCK(ph);
5432                         pip = next;
5433                 } else {
5434                         i_mdi_client_unlock(ct);
5435                         pip = next;
5436                 }
5437         }
5438 
5439         MDI_PHCI_UNLOCK(ph);
5440 
5441         return;
5442 }
5443 
5444 /*
5445  * offline the path(s) hanging off the pHCI. If the
5446  * last path to any client, check that constraints
5447  * have been applied.
5448  *
5449  * If constraint is 0, we aren't going to retire the 
5450  * pHCI. However we still need to go through the paths
5451  * calling e_ddi_retire_finalize() to clear their
5452  * contract barriers.
5453  */
5454 void
5455 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only, void *constraint)
5456 {
5457         mdi_phci_t      *ph;
5458         mdi_client_t    *ct;
5459         mdi_pathinfo_t  *pip;
5460         mdi_pathinfo_t  *next;
5461         dev_info_t      *cdip;
5462         int             unstable = 0;
5463         int             tmp_constraint;
5464 
5465         if (!MDI_PHCI(dip))
5466                 return;
5467 
5468         ph = i_devi_get_phci(dip);
5469         if (ph == NULL) {
5470                 /* no last path and no pips */
5471                 return;
5472         }
5473 
5474         MDI_PHCI_LOCK(ph);
5475 
5476         if (MDI_PHCI_IS_OFFLINE(ph)) {
5477                 MDI_PHCI_UNLOCK(ph);
5478                 /* no last path and no pips */
5479                 return;
5480         }
5481 
5482         /*
5483          * Check to see if the pHCI can be offlined
5484          */
5485         if (ph->ph_unstable) {
5486                 unstable = 1;
5487         }
5488 
5489         pip = ph->ph_path_head;
5490         while (pip != NULL) {
5491                 MDI_PI_LOCK(pip);
5492                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5493 
5494                 /*
5495                  * if failover in progress fail the pHCI from offlining
5496                  */
5497                 ct = MDI_PI(pip)->pi_client;
5498                 i_mdi_client_lock(ct, pip);
5499                 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5500                     (ct->ct_unstable)) {
5501                         unstable = 1;
5502                 }
5503                 MDI_PI_UNLOCK(pip);
5504 
5505                 /*
5506                  * Check to see of we are removing the last path of this
5507                  * client device...
5508                  */
5509                 cdip = ct->ct_dip;
5510                 if (!phci_only && cdip &&
5511                     (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5512                     (i_mdi_client_compute_state(ct, ph) ==
5513                     MDI_CLIENT_STATE_FAILED)) {
5514                         i_mdi_client_unlock(ct);
5515                         MDI_PHCI_UNLOCK(ph);
5516                         /*
5517                          * This is the last path to this client.
5518                          *
5519                          * Constraint will only be set to 1 if this client can
5520                          * be retired (as already determined by
5521                          * mdi_phci_retire_notify). However we don't actually
5522                          * need to retire the client (we just retire the last
5523                          * path - MPXIO will then fail all I/Os to the client).
5524                          * But we still need to call e_ddi_retire_finalize so
5525                          * the contract barriers can be cleared. Therefore we
5526                          * temporarily set constraint = 0 so that the client
5527                          * dip is not retired.
5528                          */
5529                         tmp_constraint = 0;
5530                         (void) e_ddi_retire_finalize(cdip, &tmp_constraint);
5531                         MDI_PHCI_LOCK(ph);
5532                         pip = next;
5533                 } else {
5534                         i_mdi_client_unlock(ct);
5535                         pip = next;
5536                 }
5537         }
5538 
5539         if (!phci_only && *((int *)constraint) == 0) {
5540                 MDI_PHCI_UNLOCK(ph);
5541                 return;
5542         }
5543 
5544         /*
5545          * Cannot offline pip(s)
5546          */
5547         if (unstable) {
5548                 cmn_err(CE_WARN, "%s%d: mdi_phci_retire_finalize: "
5549                     "pHCI in transient state, cannot retire",
5550                     ddi_driver_name(dip), ddi_get_instance(dip));
5551                 MDI_PHCI_UNLOCK(ph);
5552                 return;
5553         }
5554 
5555         /*
5556          * Mark the pHCI as offline
5557          */
5558         MDI_PHCI_SET_OFFLINE(ph);
5559 
5560         /*
5561          * Mark the child mdi_pathinfo nodes as transient
5562          */
5563         pip = ph->ph_path_head;
5564         while (pip != NULL) {
5565                 MDI_PI_LOCK(pip);
5566                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5567                 MDI_PI_SET_OFFLINING(pip);
5568                 MDI_PI_UNLOCK(pip);
5569                 pip = next;
5570         }
5571         MDI_PHCI_UNLOCK(ph);
5572         /*
5573          * Give a chance for any pending commands to execute
5574          */
5575         delay_random(mdi_delay);
5576         MDI_PHCI_LOCK(ph);
5577         pip = ph->ph_path_head;
5578         while (pip != NULL) {
5579                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5580                 (void) i_mdi_pi_offline(pip, 0);
5581                 MDI_PI_LOCK(pip);
5582                 ct = MDI_PI(pip)->pi_client;
5583                 if (!MDI_PI_IS_OFFLINE(pip)) {
5584                         cmn_err(CE_WARN, "mdi_phci_retire_finalize: "
5585                             "path %d %s busy, cannot offline",
5586                             mdi_pi_get_path_instance(pip),
5587                             mdi_pi_spathname(pip));
5588                         MDI_PI_UNLOCK(pip);
5589                         MDI_PHCI_SET_ONLINE(ph);
5590                         MDI_PHCI_UNLOCK(ph);
5591                         return;
5592                 }
5593                 MDI_PI_UNLOCK(pip);
5594                 pip = next;
5595         }
5596         MDI_PHCI_UNLOCK(ph);
5597 
5598         return;
5599 }
5600 
5601 void
5602 mdi_phci_unretire(dev_info_t *dip)
5603 {
5604         mdi_phci_t      *ph;
5605         mdi_pathinfo_t  *pip;
5606         mdi_pathinfo_t  *next;
5607 
5608         ASSERT(MDI_PHCI(dip));
5609 
5610         /*
5611          * Online the phci
5612          */
5613         i_mdi_phci_online(dip);
5614 
5615         ph = i_devi_get_phci(dip);
5616         MDI_PHCI_LOCK(ph);
5617         pip = ph->ph_path_head;
5618         while (pip != NULL) {
5619                 MDI_PI_LOCK(pip);
5620                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5621                 MDI_PI_UNLOCK(pip);
5622                 (void) i_mdi_pi_online(pip, 0);
5623                 pip = next;
5624         }
5625         MDI_PHCI_UNLOCK(ph);
5626 }
5627 
5628 /*ARGSUSED*/
5629 static int
5630 i_mdi_client_offline(dev_info_t *dip, uint_t flags)
5631 {
5632         int             rv = NDI_SUCCESS;
5633         mdi_client_t    *ct;
5634 
5635         /*
5636          * Client component to go offline.  Make sure that we are
5637          * not in failing over state and update client state
5638          * accordingly
5639          */
5640         ct = i_devi_get_client(dip);
5641         MDI_DEBUG(2, (MDI_NOTE, dip,
5642             "called %p %p", (void *)dip, (void *)ct));
5643         if (ct != NULL) {
5644                 MDI_CLIENT_LOCK(ct);
5645                 if (ct->ct_unstable) {
5646                         /*
5647                          * One or more paths are in transient state,
5648                          * Dont allow offline of a client device
5649                          */
5650                         MDI_DEBUG(1, (MDI_WARN, dip,
5651                             "!One or more paths to "
5652                             "this device are in transient state. "
5653                             "This device can not be removed at this moment. "
5654                             "Please try again later."));
5655                         MDI_CLIENT_UNLOCK(ct);
5656                         return (NDI_BUSY);
5657                 }
5658                 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
5659                         /*
5660                          * Failover is in progress, Dont allow DR of
5661                          * a client device
5662                          */
5663                         MDI_DEBUG(1, (MDI_WARN, dip,
5664                             "!Client device is Busy. "
5665                             "This device can not be removed at this moment. "
5666                             "Please try again later."));
5667                         MDI_CLIENT_UNLOCK(ct);
5668                         return (NDI_BUSY);
5669                 }
5670                 MDI_CLIENT_SET_OFFLINE(ct);
5671 
5672                 /*
5673                  * Unbind our relationship with the dev_info node
5674                  */
5675                 if (flags & NDI_DEVI_REMOVE) {
5676                         ct->ct_dip = NULL;
5677                 }
5678                 MDI_CLIENT_UNLOCK(ct);
5679         }
5680         return (rv);
5681 }
5682 
5683 /*
5684  * mdi_pre_attach():
5685  *              Pre attach() notification handler
5686  */
5687 /*ARGSUSED*/
5688 int
5689 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5690 {
5691         /* don't support old DDI_PM_RESUME */
5692         if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) &&
5693             (cmd == DDI_PM_RESUME))
5694                 return (DDI_FAILURE);
5695 
5696         return (DDI_SUCCESS);
5697 }
5698 
5699 /*
5700  * mdi_post_attach():
5701  *              Post attach() notification handler
5702  */
5703 /*ARGSUSED*/
5704 void
5705 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error)
5706 {
5707         mdi_phci_t      *ph;
5708         mdi_client_t    *ct;
5709         mdi_vhci_t      *vh;
5710 
5711         if (MDI_PHCI(dip)) {
5712                 ph = i_devi_get_phci(dip);
5713                 ASSERT(ph != NULL);
5714 
5715                 MDI_PHCI_LOCK(ph);
5716                 switch (cmd) {
5717                 case DDI_ATTACH:
5718                         MDI_DEBUG(2, (MDI_NOTE, dip,
5719                             "phci post_attach called %p", (void *)ph));
5720                         if (error == DDI_SUCCESS) {
5721                                 MDI_PHCI_SET_ATTACH(ph);
5722                         } else {
5723                                 MDI_DEBUG(1, (MDI_NOTE, dip,
5724                                     "!pHCI post_attach failed: error %d",
5725                                     error));
5726                                 MDI_PHCI_SET_DETACH(ph);
5727                         }
5728                         break;
5729 
5730                 case DDI_RESUME:
5731                         MDI_DEBUG(2, (MDI_NOTE, dip,
5732                             "pHCI post_resume: called %p", (void *)ph));
5733                         if (error == DDI_SUCCESS) {
5734                                 MDI_PHCI_SET_RESUME(ph);
5735                         } else {
5736                                 MDI_DEBUG(1, (MDI_NOTE, dip,
5737                                     "!pHCI post_resume failed: error %d",
5738                                     error));
5739                                 MDI_PHCI_SET_SUSPEND(ph);
5740                         }
5741                         break;
5742                 }
5743                 MDI_PHCI_UNLOCK(ph);
5744         }
5745 
5746         if (MDI_CLIENT(dip)) {
5747                 ct = i_devi_get_client(dip);
5748                 ASSERT(ct != NULL);
5749 
5750                 MDI_CLIENT_LOCK(ct);
5751                 switch (cmd) {
5752                 case DDI_ATTACH:
5753                         MDI_DEBUG(2, (MDI_NOTE, dip,
5754                             "client post_attach called %p", (void *)ct));
5755                         if (error != DDI_SUCCESS) {
5756                                 MDI_DEBUG(1, (MDI_NOTE, dip,
5757                                     "!client post_attach failed: error %d",
5758                                     error));
5759                                 MDI_CLIENT_SET_DETACH(ct);
5760                                 MDI_DEBUG(4, (MDI_WARN, dip,
5761                                     "i_mdi_pm_reset_client"));
5762                                 i_mdi_pm_reset_client(ct);
5763                                 break;
5764                         }
5765 
5766                         /*
5767                          * Client device has successfully attached, inform
5768                          * the vhci.
5769                          */
5770                         vh = ct->ct_vhci;
5771                         if (vh->vh_ops->vo_client_attached)
5772                                 (*vh->vh_ops->vo_client_attached)(dip);
5773 
5774                         MDI_CLIENT_SET_ATTACH(ct);
5775                         break;
5776 
5777                 case DDI_RESUME:
5778                         MDI_DEBUG(2, (MDI_NOTE, dip,
5779                             "client post_attach: called %p", (void *)ct));
5780                         if (error == DDI_SUCCESS) {
5781                                 MDI_CLIENT_SET_RESUME(ct);
5782                         } else {
5783                                 MDI_DEBUG(1, (MDI_NOTE, dip,
5784                                     "!client post_resume failed: error %d",
5785                                     error));
5786                                 MDI_CLIENT_SET_SUSPEND(ct);
5787                         }
5788                         break;
5789                 }
5790                 MDI_CLIENT_UNLOCK(ct);
5791         }
5792 }
5793 
5794 /*
5795  * mdi_pre_detach():
5796  *              Pre detach notification handler
5797  */
5798 /*ARGSUSED*/
5799 int
5800 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5801 {
5802         int rv = DDI_SUCCESS;
5803 
5804         if (MDI_CLIENT(dip)) {
5805                 (void) i_mdi_client_pre_detach(dip, cmd);
5806         }
5807 
5808         if (MDI_PHCI(dip)) {
5809                 rv = i_mdi_phci_pre_detach(dip, cmd);
5810         }
5811 
5812         return (rv);
5813 }
5814 
5815 /*ARGSUSED*/
5816 static int
5817 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5818 {
5819         int             rv = DDI_SUCCESS;
5820         mdi_phci_t      *ph;
5821         mdi_client_t    *ct;
5822         mdi_pathinfo_t  *pip;
5823         mdi_pathinfo_t  *failed_pip = NULL;
5824         mdi_pathinfo_t  *next;
5825 
5826         ph = i_devi_get_phci(dip);
5827         if (ph == NULL) {
5828                 return (rv);
5829         }
5830 
5831         MDI_PHCI_LOCK(ph);
5832         switch (cmd) {
5833         case DDI_DETACH:
5834                 MDI_DEBUG(2, (MDI_NOTE, dip,
5835                     "pHCI pre_detach: called %p", (void *)ph));
5836                 if (!MDI_PHCI_IS_OFFLINE(ph)) {
5837                         /*
5838                          * mdi_pathinfo nodes are still attached to
5839                          * this pHCI. Fail the detach for this pHCI.
5840                          */
5841                         MDI_DEBUG(2, (MDI_WARN, dip,
5842                             "pHCI pre_detach: paths are still attached %p",
5843                             (void *)ph));
5844                         rv = DDI_FAILURE;
5845                         break;
5846                 }
5847                 MDI_PHCI_SET_DETACH(ph);
5848                 break;
5849 
5850         case DDI_SUSPEND:
5851                 /*
5852                  * pHCI is getting suspended.  Since mpxio client
5853                  * devices may not be suspended at this point, to avoid
5854                  * a potential stack overflow, it is important to suspend
5855                  * client devices before pHCI can be suspended.
5856                  */
5857 
5858                 MDI_DEBUG(2, (MDI_NOTE, dip,
5859                     "pHCI pre_suspend: called %p", (void *)ph));
5860                 /*
5861                  * Suspend all the client devices accessible through this pHCI
5862                  */
5863                 pip = ph->ph_path_head;
5864                 while (pip != NULL && rv == DDI_SUCCESS) {
5865                         dev_info_t *cdip;
5866                         MDI_PI_LOCK(pip);
5867                         next =
5868                             (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5869                         ct = MDI_PI(pip)->pi_client;
5870                         i_mdi_client_lock(ct, pip);
5871                         cdip = ct->ct_dip;
5872                         MDI_PI_UNLOCK(pip);
5873                         if ((MDI_CLIENT_IS_DETACHED(ct) == 0) &&
5874                             MDI_CLIENT_IS_SUSPENDED(ct) == 0) {
5875                                 i_mdi_client_unlock(ct);
5876                                 if ((rv = devi_detach(cdip, DDI_SUSPEND)) !=
5877                                     DDI_SUCCESS) {
5878                                         /*
5879                                          * Suspend of one of the client
5880                                          * device has failed.
5881                                          */
5882                                         MDI_DEBUG(1, (MDI_WARN, dip,
5883                                             "!suspend of device (%s%d) failed.",
5884                                             ddi_driver_name(cdip),
5885                                             ddi_get_instance(cdip)));
5886                                         failed_pip = pip;
5887                                         break;
5888                                 }
5889                         } else {
5890                                 i_mdi_client_unlock(ct);
5891                         }
5892                         pip = next;
5893                 }
5894 
5895                 if (rv == DDI_SUCCESS) {
5896                         /*
5897                          * Suspend of client devices is complete. Proceed
5898                          * with pHCI suspend.
5899                          */
5900                         MDI_PHCI_SET_SUSPEND(ph);
5901                 } else {
5902                         /*
5903                          * Revert back all the suspended client device states
5904                          * to converse.
5905                          */
5906                         pip = ph->ph_path_head;
5907                         while (pip != failed_pip) {
5908                                 dev_info_t *cdip;
5909                                 MDI_PI_LOCK(pip);
5910                                 next =
5911                                     (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5912                                 ct = MDI_PI(pip)->pi_client;
5913                                 i_mdi_client_lock(ct, pip);
5914                                 cdip = ct->ct_dip;
5915                                 MDI_PI_UNLOCK(pip);
5916                                 if (MDI_CLIENT_IS_SUSPENDED(ct)) {
5917                                         i_mdi_client_unlock(ct);
5918                                         (void) devi_attach(cdip, DDI_RESUME);
5919                                 } else {
5920                                         i_mdi_client_unlock(ct);
5921                                 }
5922                                 pip = next;
5923                         }
5924                 }
5925                 break;
5926 
5927         default:
5928                 rv = DDI_FAILURE;
5929                 break;
5930         }
5931         MDI_PHCI_UNLOCK(ph);
5932         return (rv);
5933 }
5934 
5935 /*ARGSUSED*/
5936 static int
5937 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5938 {
5939         int             rv = DDI_SUCCESS;
5940         mdi_client_t    *ct;
5941 
5942         ct = i_devi_get_client(dip);
5943         if (ct == NULL) {
5944                 return (rv);
5945         }
5946 
5947         MDI_CLIENT_LOCK(ct);
5948         switch (cmd) {
5949         case DDI_DETACH:
5950                 MDI_DEBUG(2, (MDI_NOTE, dip,
5951                     "client pre_detach: called %p",
5952                      (void *)ct));
5953                 MDI_CLIENT_SET_DETACH(ct);
5954                 break;
5955 
5956         case DDI_SUSPEND:
5957                 MDI_DEBUG(2, (MDI_NOTE, dip,
5958                     "client pre_suspend: called %p",
5959                     (void *)ct));
5960                 MDI_CLIENT_SET_SUSPEND(ct);
5961                 break;
5962 
5963         default:
5964                 rv = DDI_FAILURE;
5965                 break;
5966         }
5967         MDI_CLIENT_UNLOCK(ct);
5968         return (rv);
5969 }
5970 
5971 /*
5972  * mdi_post_detach():
5973  *              Post detach notification handler
5974  */
5975 /*ARGSUSED*/
5976 void
5977 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5978 {
5979         /*
5980          * Detach/Suspend of mpxio component failed. Update our state
5981          * too
5982          */
5983         if (MDI_PHCI(dip))
5984                 i_mdi_phci_post_detach(dip, cmd, error);
5985 
5986         if (MDI_CLIENT(dip))
5987                 i_mdi_client_post_detach(dip, cmd, error);
5988 }
5989 
5990 /*ARGSUSED*/
5991 static void
5992 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5993 {
5994         mdi_phci_t      *ph;
5995 
5996         /*
5997          * Detach/Suspend of phci component failed. Update our state
5998          * too
5999          */
6000         ph = i_devi_get_phci(dip);
6001         if (ph == NULL) {
6002                 return;
6003         }
6004 
6005         MDI_PHCI_LOCK(ph);
6006         /*
6007          * Detach of pHCI failed. Restore back converse
6008          * state
6009          */
6010         switch (cmd) {
6011         case DDI_DETACH:
6012                 MDI_DEBUG(2, (MDI_NOTE, dip,
6013                     "pHCI post_detach: called %p",
6014                     (void *)ph));
6015                 if (error != DDI_SUCCESS)
6016                         MDI_PHCI_SET_ATTACH(ph);
6017                 break;
6018 
6019         case DDI_SUSPEND:
6020                 MDI_DEBUG(2, (MDI_NOTE, dip,
6021                     "pHCI post_suspend: called %p",
6022                     (void *)ph));
6023                 if (error != DDI_SUCCESS)
6024                         MDI_PHCI_SET_RESUME(ph);
6025                 break;
6026         }
6027         MDI_PHCI_UNLOCK(ph);
6028 }
6029 
6030 /*ARGSUSED*/
6031 static void
6032 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
6033 {
6034         mdi_client_t    *ct;
6035 
6036         ct = i_devi_get_client(dip);
6037         if (ct == NULL) {
6038                 return;
6039         }
6040         MDI_CLIENT_LOCK(ct);
6041         /*
6042          * Detach of Client failed. Restore back converse
6043          * state
6044          */
6045         switch (cmd) {
6046         case DDI_DETACH:
6047                 MDI_DEBUG(2, (MDI_NOTE, dip,
6048                     "client post_detach: called %p", (void *)ct));
6049                 if (DEVI_IS_ATTACHING(dip)) {
6050                         MDI_DEBUG(4, (MDI_NOTE, dip,
6051                             "i_mdi_pm_rele_client\n"));
6052                         i_mdi_pm_rele_client(ct, ct->ct_path_count);
6053                 } else {
6054                         MDI_DEBUG(4, (MDI_NOTE, dip,
6055                             "i_mdi_pm_reset_client\n"));
6056                         i_mdi_pm_reset_client(ct);
6057                 }
6058                 if (error != DDI_SUCCESS)
6059                         MDI_CLIENT_SET_ATTACH(ct);
6060                 break;
6061 
6062         case DDI_SUSPEND:
6063                 MDI_DEBUG(2, (MDI_NOTE, dip,
6064                     "called %p", (void *)ct));
6065                 if (error != DDI_SUCCESS)
6066                         MDI_CLIENT_SET_RESUME(ct);
6067                 break;
6068         }
6069         MDI_CLIENT_UNLOCK(ct);
6070 }
6071 
6072 int
6073 mdi_pi_kstat_exists(mdi_pathinfo_t *pip)
6074 {
6075         return (MDI_PI(pip)->pi_kstats ? 1 : 0);
6076 }
6077 
6078 /*
6079  * create and install per-path (client - pHCI) statistics
6080  * I/O stats supported: nread, nwritten, reads, and writes
6081  * Error stats - hard errors, soft errors, & transport errors
6082  */
6083 int
6084 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname)
6085 {
6086         kstat_t                 *kiosp, *kerrsp;
6087         struct pi_errs          *nsp;
6088         struct mdi_pi_kstats    *mdi_statp;
6089 
6090         if (MDI_PI(pip)->pi_kstats != NULL)
6091                 return (MDI_SUCCESS);
6092 
6093         if ((kiosp = kstat_create("mdi", 0, ksname, "iopath",
6094             KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) {
6095                 return (MDI_FAILURE);
6096         }
6097 
6098         (void) strcat(ksname, ",err");
6099         kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors",
6100             KSTAT_TYPE_NAMED,
6101             sizeof (struct pi_errs) / sizeof (kstat_named_t), 0);
6102         if (kerrsp == NULL) {
6103                 kstat_delete(kiosp);
6104                 return (MDI_FAILURE);
6105         }
6106 
6107         nsp = (struct pi_errs *)kerrsp->ks_data;
6108         kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32);
6109         kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32);
6110         kstat_named_init(&nsp->pi_transerrs, "Transport Errors",
6111             KSTAT_DATA_UINT32);
6112         kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy",
6113             KSTAT_DATA_UINT32);
6114         kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors",
6115             KSTAT_DATA_UINT32);
6116         kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources",
6117             KSTAT_DATA_UINT32);
6118         kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors",
6119             KSTAT_DATA_UINT32);
6120         kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State",
6121             KSTAT_DATA_UINT32);
6122         kstat_named_init(&nsp->pi_failedfrom, "Failed From",
6123             KSTAT_DATA_UINT32);
6124         kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32);
6125 
6126         mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP);
6127         mdi_statp->pi_kstat_ref = 1;
6128         mdi_statp->pi_kstat_iostats = kiosp;
6129         mdi_statp->pi_kstat_errstats = kerrsp;
6130         kstat_install(kiosp);
6131         kstat_install(kerrsp);
6132         MDI_PI(pip)->pi_kstats = mdi_statp;
6133         return (MDI_SUCCESS);
6134 }
6135 
6136 /*
6137  * destroy per-path properties
6138  */
6139 static void
6140 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip)
6141 {
6142 
6143         struct mdi_pi_kstats *mdi_statp;
6144 
6145         if (MDI_PI(pip)->pi_kstats == NULL)
6146                 return;
6147         if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL)
6148                 return;
6149 
6150         MDI_PI(pip)->pi_kstats = NULL;
6151 
6152         /*
6153          * the kstat may be shared between multiple pathinfo nodes
6154          * decrement this pathinfo's usage, removing the kstats
6155          * themselves when the last pathinfo reference is removed.
6156          */
6157         ASSERT(mdi_statp->pi_kstat_ref > 0);
6158         if (--mdi_statp->pi_kstat_ref != 0)
6159                 return;
6160 
6161         kstat_delete(mdi_statp->pi_kstat_iostats);
6162         kstat_delete(mdi_statp->pi_kstat_errstats);
6163         kmem_free(mdi_statp, sizeof (*mdi_statp));
6164 }
6165 
6166 /*
6167  * update I/O paths KSTATS
6168  */
6169 void
6170 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp)
6171 {
6172         kstat_t *iostatp;
6173         size_t xfer_cnt;
6174 
6175         ASSERT(pip != NULL);
6176 
6177         /*
6178          * I/O can be driven across a path prior to having path
6179          * statistics available, i.e. probe(9e).
6180          */
6181         if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) {
6182                 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats;
6183                 xfer_cnt = bp->b_bcount - bp->b_resid;
6184                 if (bp->b_flags & B_READ) {
6185                         KSTAT_IO_PTR(iostatp)->reads++;
6186                         KSTAT_IO_PTR(iostatp)->nread += xfer_cnt;
6187                 } else {
6188                         KSTAT_IO_PTR(iostatp)->writes++;
6189                         KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt;
6190                 }
6191         }
6192 }
6193 
6194 /*
6195  * Enable the path(specific client/target/initiator)
6196  * Enabling a path means that MPxIO may select the enabled path for routing
6197  * future I/O requests, subject to other path state constraints.
6198  */
6199 int
6200 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags)
6201 {
6202         mdi_phci_t      *ph;
6203 
6204         ph = MDI_PI(pip)->pi_phci;
6205         if (ph == NULL) {
6206                 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip),
6207                     "!failed: path %s %p: NULL ph",
6208                     mdi_pi_spathname(pip), (void *)pip));
6209                 return (MDI_FAILURE);
6210         }
6211 
6212         (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags,
6213                 MDI_ENABLE_OP);
6214         MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip,
6215             "!returning success pip = %p. ph = %p",
6216             (void *)pip, (void *)ph));
6217         return (MDI_SUCCESS);
6218 
6219 }
6220 
6221 /*
6222  * Disable the path (specific client/target/initiator)
6223  * Disabling a path means that MPxIO will not select the disabled path for
6224  * routing any new I/O requests.
6225  */
6226 int
6227 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags)
6228 {
6229         mdi_phci_t      *ph;
6230 
6231         ph = MDI_PI(pip)->pi_phci;
6232         if (ph == NULL) {
6233                 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip),
6234                     "!failed: path %s %p: NULL ph",
6235                     mdi_pi_spathname(pip), (void *)pip));
6236                 return (MDI_FAILURE);
6237         }
6238 
6239         (void) i_mdi_enable_disable_path(pip,
6240             ph->ph_vhci, flags, MDI_DISABLE_OP);
6241         MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip,
6242             "!returning success pip = %p. ph = %p",
6243             (void *)pip, (void *)ph));
6244         return (MDI_SUCCESS);
6245 }
6246 
6247 /*
6248  * disable the path to a particular pHCI (pHCI specified in the phci_path
6249  * argument) for a particular client (specified in the client_path argument).
6250  * Disabling a path means that MPxIO will not select the disabled path for
6251  * routing any new I/O requests.
6252  * NOTE: this will be removed once the NWS files are changed to use the new
6253  * mdi_{enable,disable}_path interfaces
6254  */
6255 int
6256 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags)
6257 {
6258         return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP));
6259 }
6260 
6261 /*
6262  * Enable the path to a particular pHCI (pHCI specified in the phci_path
6263  * argument) for a particular client (specified in the client_path argument).
6264  * Enabling a path means that MPxIO may select the enabled path for routing
6265  * future I/O requests, subject to other path state constraints.
6266  * NOTE: this will be removed once the NWS files are changed to use the new
6267  * mdi_{enable,disable}_path interfaces
6268  */
6269 
6270 int
6271 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags)
6272 {
6273         return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP));
6274 }
6275 
6276 /*
6277  * Common routine for doing enable/disable.
6278  */
6279 static mdi_pathinfo_t *
6280 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags,
6281                 int op)
6282 {
6283         int             sync_flag = 0;
6284         int             rv;
6285         mdi_pathinfo_t  *next;
6286         int             (*f)() = NULL;
6287 
6288         /*
6289          * Check to make sure the path is not already in the
6290          * requested state. If it is just return the next path
6291          * as we have nothing to do here.
6292          */
6293         if ((MDI_PI_IS_DISABLE(pip) && op == MDI_DISABLE_OP) ||
6294             (!MDI_PI_IS_DISABLE(pip) && op == MDI_ENABLE_OP)) {
6295                 MDI_PI_LOCK(pip);
6296                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
6297                 MDI_PI_UNLOCK(pip);
6298                 return (next);
6299         }
6300 
6301         f = vh->vh_ops->vo_pi_state_change;
6302 
6303         sync_flag = (flags << 8) & 0xf00;
6304 
6305         /*
6306          * Do a callback into the mdi consumer to let it
6307          * know that path is about to get enabled/disabled.
6308          */
6309         if (f != NULL) {
6310                 rv = (*f)(vh->vh_dip, pip, 0,
6311                         MDI_PI_EXT_STATE(pip),
6312                         MDI_EXT_STATE_CHANGE | sync_flag |
6313                         op | MDI_BEFORE_STATE_CHANGE);
6314                 if (rv != MDI_SUCCESS) {
6315                         MDI_DEBUG(2, (MDI_WARN, vh->vh_dip,
6316                             "vo_pi_state_change: failed rv = %x", rv));
6317                 }
6318         }
6319         MDI_PI_LOCK(pip);
6320         next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
6321 
6322         switch (flags) {
6323                 case USER_DISABLE:
6324                         if (op == MDI_DISABLE_OP) {
6325                                 MDI_PI_SET_USER_DISABLE(pip);
6326                         } else {
6327                                 MDI_PI_SET_USER_ENABLE(pip);
6328                         }
6329                         break;
6330                 case DRIVER_DISABLE:
6331                         if (op == MDI_DISABLE_OP) {
6332                                 MDI_PI_SET_DRV_DISABLE(pip);
6333                         } else {
6334                                 MDI_PI_SET_DRV_ENABLE(pip);
6335                         }
6336                         break;
6337                 case DRIVER_DISABLE_TRANSIENT:
6338                         if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) {
6339                                 MDI_PI_SET_DRV_DISABLE_TRANS(pip);
6340                         } else {
6341                                 MDI_PI_SET_DRV_ENABLE_TRANS(pip);
6342                         }
6343                         break;
6344         }
6345         MDI_PI_UNLOCK(pip);
6346         /*
6347          * Do a callback into the mdi consumer to let it
6348          * know that path is now enabled/disabled.
6349          */
6350         if (f != NULL) {
6351                 rv = (*f)(vh->vh_dip, pip, 0,
6352                         MDI_PI_EXT_STATE(pip),
6353                         MDI_EXT_STATE_CHANGE | sync_flag |
6354                         op | MDI_AFTER_STATE_CHANGE);
6355                 if (rv != MDI_SUCCESS) {
6356                         MDI_DEBUG(2, (MDI_WARN, vh->vh_dip,
6357                             "vo_pi_state_change failed: rv = %x", rv));
6358                 }
6359         }
6360         return (next);
6361 }
6362 
6363 /*
6364  * Common routine for doing enable/disable.
6365  * NOTE: this will be removed once the NWS files are changed to use the new
6366  * mdi_{enable,disable}_path has been putback
6367  */
6368 int
6369 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op)
6370 {
6371 
6372         mdi_phci_t      *ph;
6373         mdi_vhci_t      *vh = NULL;
6374         mdi_client_t    *ct;
6375         mdi_pathinfo_t  *next, *pip;
6376         int             found_it;
6377 
6378         ph = i_devi_get_phci(pdip);
6379         MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip,
6380             "!op = %d pdip = %p cdip = %p", op, (void *)pdip,
6381             (void *)cdip));
6382         if (ph == NULL) {
6383                 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6384                     "!failed: operation %d: NULL ph", op));
6385                 return (MDI_FAILURE);
6386         }
6387 
6388         if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) {
6389                 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6390                     "!failed: invalid operation %d", op));
6391                 return (MDI_FAILURE);
6392         }
6393 
6394         vh = ph->ph_vhci;
6395 
6396         if (cdip == NULL) {
6397                 /*
6398                  * Need to mark the Phci as enabled/disabled.
6399                  */
6400                 MDI_DEBUG(4, (MDI_NOTE, cdip ? cdip : pdip,
6401                     "op %d for the phci", op));
6402                 MDI_PHCI_LOCK(ph);
6403                 switch (flags) {
6404                         case USER_DISABLE:
6405                                 if (op == MDI_DISABLE_OP) {
6406                                         MDI_PHCI_SET_USER_DISABLE(ph);
6407                                 } else {
6408                                         MDI_PHCI_SET_USER_ENABLE(ph);
6409                                 }
6410                                 break;
6411                         case DRIVER_DISABLE:
6412                                 if (op == MDI_DISABLE_OP) {
6413                                         MDI_PHCI_SET_DRV_DISABLE(ph);
6414                                 } else {
6415                                         MDI_PHCI_SET_DRV_ENABLE(ph);
6416                                 }
6417                                 break;
6418                         case DRIVER_DISABLE_TRANSIENT:
6419                                 if (op == MDI_DISABLE_OP) {
6420                                         MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph);
6421                                 } else {
6422                                         MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph);
6423                                 }
6424                                 break;
6425                         default:
6426                                 MDI_PHCI_UNLOCK(ph);
6427                                 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6428                                     "!invalid flag argument= %d", flags));
6429                 }
6430 
6431                 /*
6432                  * Phci has been disabled. Now try to enable/disable
6433                  * path info's to each client.
6434                  */
6435                 pip = ph->ph_path_head;
6436                 while (pip != NULL) {
6437                         pip = i_mdi_enable_disable_path(pip, vh, flags, op);
6438                 }
6439                 MDI_PHCI_UNLOCK(ph);
6440         } else {
6441 
6442                 /*
6443                  * Disable a specific client.
6444                  */
6445                 ct = i_devi_get_client(cdip);
6446                 if (ct == NULL) {
6447                         MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6448                             "!failed: operation = %d: NULL ct", op));
6449                         return (MDI_FAILURE);
6450                 }
6451 
6452                 MDI_CLIENT_LOCK(ct);
6453                 pip = ct->ct_path_head;
6454                 found_it = 0;
6455                 while (pip != NULL) {
6456                         MDI_PI_LOCK(pip);
6457                         next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6458                         if (MDI_PI(pip)->pi_phci == ph) {
6459                                 MDI_PI_UNLOCK(pip);
6460                                 found_it = 1;
6461                                 break;
6462                         }
6463                         MDI_PI_UNLOCK(pip);
6464                         pip = next;
6465                 }
6466 
6467 
6468                 MDI_CLIENT_UNLOCK(ct);
6469                 if (found_it == 0) {
6470                         MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6471                             "!failed. Could not find corresponding pip\n"));
6472                         return (MDI_FAILURE);
6473                 }
6474 
6475                 (void) i_mdi_enable_disable_path(pip, vh, flags, op);
6476         }
6477 
6478         MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip,
6479             "!op %d returning success pdip = %p cdip = %p",
6480             op, (void *)pdip, (void *)cdip));
6481         return (MDI_SUCCESS);
6482 }
6483 
6484 /*
6485  * Ensure phci powered up
6486  */
6487 static void
6488 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip)
6489 {
6490         dev_info_t      *ph_dip;
6491 
6492         ASSERT(pip != NULL);
6493         ASSERT(MDI_PI_LOCKED(pip));
6494 
6495         if (MDI_PI(pip)->pi_pm_held) {
6496                 return;
6497         }
6498 
6499         ph_dip = mdi_pi_get_phci(pip);
6500         MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6501             "%s %p", mdi_pi_spathname(pip), (void *)pip));
6502         if (ph_dip == NULL) {
6503                 return;
6504         }
6505 
6506         MDI_PI_UNLOCK(pip);
6507         MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt was %d",
6508             DEVI(ph_dip)->devi_pm_kidsupcnt));
6509         pm_hold_power(ph_dip);
6510         MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt is %d",
6511             DEVI(ph_dip)->devi_pm_kidsupcnt));
6512         MDI_PI_LOCK(pip);
6513 
6514         /* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */
6515         if (DEVI(ph_dip)->devi_pm_info)
6516                 MDI_PI(pip)->pi_pm_held = 1;
6517 }
6518 
6519 /*
6520  * Allow phci powered down
6521  */
6522 static void
6523 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip)
6524 {
6525         dev_info_t      *ph_dip = NULL;
6526 
6527         ASSERT(pip != NULL);
6528         ASSERT(MDI_PI_LOCKED(pip));
6529 
6530         if (MDI_PI(pip)->pi_pm_held == 0) {
6531                 return;
6532         }
6533 
6534         ph_dip = mdi_pi_get_phci(pip);
6535         ASSERT(ph_dip != NULL);
6536 
6537         MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6538             "%s %p", mdi_pi_spathname(pip), (void *)pip));
6539 
6540         MDI_PI_UNLOCK(pip);
6541         MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6542             "kidsupcnt was %d", DEVI(ph_dip)->devi_pm_kidsupcnt));
6543         pm_rele_power(ph_dip);
6544         MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6545             "kidsupcnt is %d", DEVI(ph_dip)->devi_pm_kidsupcnt));
6546         MDI_PI_LOCK(pip);
6547 
6548         MDI_PI(pip)->pi_pm_held = 0;
6549 }
6550 
6551 static void
6552 i_mdi_pm_hold_client(mdi_client_t *ct, int incr)
6553 {
6554         ASSERT(MDI_CLIENT_LOCKED(ct));
6555 
6556         ct->ct_power_cnt += incr;
6557         MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6558             "%p ct_power_cnt = %d incr = %d",
6559             (void *)ct, ct->ct_power_cnt, incr));
6560         ASSERT(ct->ct_power_cnt >= 0);
6561 }
6562 
6563 static void
6564 i_mdi_rele_all_phci(mdi_client_t *ct)
6565 {
6566         mdi_pathinfo_t  *pip;
6567 
6568         ASSERT(MDI_CLIENT_LOCKED(ct));
6569         pip = (mdi_pathinfo_t *)ct->ct_path_head;
6570         while (pip != NULL) {
6571                 mdi_hold_path(pip);
6572                 MDI_PI_LOCK(pip);
6573                 i_mdi_pm_rele_pip(pip);
6574                 MDI_PI_UNLOCK(pip);
6575                 mdi_rele_path(pip);
6576                 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6577         }
6578 }
6579 
6580 static void
6581 i_mdi_pm_rele_client(mdi_client_t *ct, int decr)
6582 {
6583         ASSERT(MDI_CLIENT_LOCKED(ct));
6584 
6585         if (i_ddi_devi_attached(ct->ct_dip)) {
6586                 ct->ct_power_cnt -= decr;
6587                 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6588                     "%p ct_power_cnt = %d decr = %d",
6589                     (void *)ct, ct->ct_power_cnt, decr));
6590         }
6591 
6592         ASSERT(ct->ct_power_cnt >= 0);
6593         if (ct->ct_power_cnt == 0) {
6594                 i_mdi_rele_all_phci(ct);
6595                 return;
6596         }
6597 }
6598 
6599 static void
6600 i_mdi_pm_reset_client(mdi_client_t *ct)
6601 {
6602         MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6603             "%p ct_power_cnt = %d", (void *)ct, ct->ct_power_cnt));
6604         ASSERT(MDI_CLIENT_LOCKED(ct));
6605         ct->ct_power_cnt = 0;
6606         i_mdi_rele_all_phci(ct);
6607         ct->ct_powercnt_config = 0;
6608         ct->ct_powercnt_unconfig = 0;
6609         ct->ct_powercnt_reset = 1;
6610 }
6611 
6612 static int
6613 i_mdi_power_one_phci(mdi_pathinfo_t *pip)
6614 {
6615         int             ret;
6616         dev_info_t      *ph_dip;
6617 
6618         MDI_PI_LOCK(pip);
6619         i_mdi_pm_hold_pip(pip);
6620 
6621         ph_dip = mdi_pi_get_phci(pip);
6622         MDI_PI_UNLOCK(pip);
6623 
6624         /* bring all components of phci to full power */
6625         MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6626             "pm_powerup for %s%d %p", ddi_driver_name(ph_dip),
6627             ddi_get_instance(ph_dip), (void *)pip));
6628 
6629         ret = pm_powerup(ph_dip);
6630 
6631         if (ret == DDI_FAILURE) {
6632                 MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6633                     "pm_powerup FAILED for %s%d %p",
6634                     ddi_driver_name(ph_dip), ddi_get_instance(ph_dip),
6635                     (void *)pip));
6636 
6637                 MDI_PI_LOCK(pip);
6638                 i_mdi_pm_rele_pip(pip);
6639                 MDI_PI_UNLOCK(pip);
6640                 return (MDI_FAILURE);
6641         }
6642 
6643         return (MDI_SUCCESS);
6644 }
6645 
6646 static int
6647 i_mdi_power_all_phci(mdi_client_t *ct)
6648 {
6649         mdi_pathinfo_t  *pip;
6650         int             succeeded = 0;
6651 
6652         ASSERT(MDI_CLIENT_LOCKED(ct));
6653         pip = (mdi_pathinfo_t *)ct->ct_path_head;
6654         while (pip != NULL) {
6655                 /*
6656                  * Don't power if MDI_PATHINFO_STATE_FAULT
6657                  * or MDI_PATHINFO_STATE_OFFLINE.
6658                  */
6659                 if (MDI_PI_IS_INIT(pip) ||
6660                     MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) {
6661                         mdi_hold_path(pip);
6662                         MDI_CLIENT_UNLOCK(ct);
6663                         if (i_mdi_power_one_phci(pip) == MDI_SUCCESS)
6664                                 succeeded = 1;
6665 
6666                         ASSERT(ct == MDI_PI(pip)->pi_client);
6667                         MDI_CLIENT_LOCK(ct);
6668                         mdi_rele_path(pip);
6669                 }
6670                 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6671         }
6672 
6673         return (succeeded ? MDI_SUCCESS : MDI_FAILURE);
6674 }
6675 
6676 /*
6677  * mdi_bus_power():
6678  *              1. Place the phci(s) into powered up state so that
6679  *                 client can do power management
6680  *              2. Ensure phci powered up as client power managing
6681  * Return Values:
6682  *              MDI_SUCCESS
6683  *              MDI_FAILURE
6684  */
6685 int
6686 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op,
6687     void *arg, void *result)
6688 {
6689         int                     ret = MDI_SUCCESS;
6690         pm_bp_child_pwrchg_t    *bpc;
6691         mdi_client_t            *ct;
6692         dev_info_t              *cdip;
6693         pm_bp_has_changed_t     *bphc;
6694 
6695         /*
6696          * BUS_POWER_NOINVOL not supported
6697          */
6698         if (op == BUS_POWER_NOINVOL)
6699                 return (MDI_FAILURE);
6700 
6701         /*
6702          * ignore other OPs.
6703          * return quickly to save cou cycles on the ct processing
6704          */
6705         switch (op) {
6706         case BUS_POWER_PRE_NOTIFICATION:
6707         case BUS_POWER_POST_NOTIFICATION:
6708                 bpc = (pm_bp_child_pwrchg_t *)arg;
6709                 cdip = bpc->bpc_dip;
6710                 break;
6711         case BUS_POWER_HAS_CHANGED:
6712                 bphc = (pm_bp_has_changed_t *)arg;
6713                 cdip = bphc->bphc_dip;
6714                 break;
6715         default:
6716                 return (pm_busop_bus_power(parent, impl_arg, op, arg, result));
6717         }
6718 
6719         ASSERT(MDI_CLIENT(cdip));
6720 
6721         ct = i_devi_get_client(cdip);
6722         if (ct == NULL)
6723                 return (MDI_FAILURE);
6724 
6725         /*
6726          * wait till the mdi_pathinfo node state change are processed
6727          */
6728         MDI_CLIENT_LOCK(ct);
6729         switch (op) {
6730         case BUS_POWER_PRE_NOTIFICATION:
6731                 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6732                     "BUS_POWER_PRE_NOTIFICATION:"
6733                     "%s@%s, olevel=%d, nlevel=%d, comp=%d",
6734                     ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6735                     bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp));
6736 
6737                 /* serialize power level change per client */
6738                 while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6739                         cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6740 
6741                 MDI_CLIENT_SET_POWER_TRANSITION(ct);
6742 
6743                 if (ct->ct_power_cnt == 0) {
6744                         ret = i_mdi_power_all_phci(ct);
6745                 }
6746 
6747                 /*
6748                  * if new_level > 0:
6749                  *      - hold phci(s)
6750                  *      - power up phci(s) if not already
6751                  * ignore power down
6752                  */
6753                 if (bpc->bpc_nlevel > 0) {
6754                         if (!DEVI_IS_ATTACHING(ct->ct_dip)) {
6755                                 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6756                                     "i_mdi_pm_hold_client\n"));
6757                                 i_mdi_pm_hold_client(ct, ct->ct_path_count);
6758                         }
6759                 }
6760                 break;
6761         case BUS_POWER_POST_NOTIFICATION:
6762                 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6763                     "BUS_POWER_POST_NOTIFICATION:"
6764                     "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d",
6765                     ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6766                     bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp,
6767                     *(int *)result));
6768 
6769                 if (*(int *)result == DDI_SUCCESS) {
6770                         if (bpc->bpc_nlevel > 0) {
6771                                 MDI_CLIENT_SET_POWER_UP(ct);
6772                         } else {
6773                                 MDI_CLIENT_SET_POWER_DOWN(ct);
6774                         }
6775                 }
6776 
6777                 /* release the hold we did in pre-notification */
6778                 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) &&
6779                     !DEVI_IS_ATTACHING(ct->ct_dip)) {
6780                         MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6781                             "i_mdi_pm_rele_client\n"));
6782                         i_mdi_pm_rele_client(ct, ct->ct_path_count);
6783                 }
6784 
6785                 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) {
6786                         /* another thread might started attaching */
6787                         if (DEVI_IS_ATTACHING(ct->ct_dip)) {
6788                                 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6789                                     "i_mdi_pm_rele_client\n"));
6790                                 i_mdi_pm_rele_client(ct, ct->ct_path_count);
6791                         /* detaching has been taken care in pm_post_unconfig */
6792                         } else if (!DEVI_IS_DETACHING(ct->ct_dip)) {
6793                                 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6794                                     "i_mdi_pm_reset_client\n"));
6795                                 i_mdi_pm_reset_client(ct);
6796                         }
6797                 }
6798 
6799                 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct);
6800                 cv_broadcast(&ct->ct_powerchange_cv);
6801 
6802                 break;
6803 
6804         /* need to do more */
6805         case BUS_POWER_HAS_CHANGED:
6806                 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6807                     "BUS_POWER_HAS_CHANGED:"
6808                     "%s@%s, olevel=%d, nlevel=%d, comp=%d",
6809                     ddi_node_name(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip),
6810                     bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp));
6811 
6812                 if (bphc->bphc_nlevel > 0 &&
6813                     bphc->bphc_nlevel > bphc->bphc_olevel) {
6814                         if (ct->ct_power_cnt == 0) {
6815                                 ret = i_mdi_power_all_phci(ct);
6816                         }
6817                         MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6818                             "i_mdi_pm_hold_client\n"));
6819                         i_mdi_pm_hold_client(ct, ct->ct_path_count);
6820                 }
6821 
6822                 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) {
6823                         MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6824                             "i_mdi_pm_rele_client\n"));
6825                         i_mdi_pm_rele_client(ct, ct->ct_path_count);
6826                 }
6827                 break;
6828         }
6829 
6830         MDI_CLIENT_UNLOCK(ct);
6831         return (ret);
6832 }
6833 
6834 static int
6835 i_mdi_pm_pre_config_one(dev_info_t *child)
6836 {
6837         int             ret = MDI_SUCCESS;
6838         mdi_client_t    *ct;
6839 
6840         ct = i_devi_get_client(child);
6841         if (ct == NULL)
6842                 return (MDI_FAILURE);
6843 
6844         MDI_CLIENT_LOCK(ct);
6845         while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6846                 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6847 
6848         if (!MDI_CLIENT_IS_FAILED(ct)) {
6849                 MDI_CLIENT_UNLOCK(ct);
6850                 MDI_DEBUG(4, (MDI_NOTE, child, "already configured\n"));
6851                 return (MDI_SUCCESS);
6852         }
6853 
6854         if (ct->ct_powercnt_config) {
6855                 MDI_CLIENT_UNLOCK(ct);
6856                 MDI_DEBUG(4, (MDI_NOTE, child, "already held\n"));
6857                 return (MDI_SUCCESS);
6858         }
6859 
6860         if (ct->ct_power_cnt == 0) {
6861                 ret = i_mdi_power_all_phci(ct);
6862         }
6863         MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n"));
6864         i_mdi_pm_hold_client(ct, ct->ct_path_count);
6865         ct->ct_powercnt_config = 1;
6866         ct->ct_powercnt_reset = 0;
6867         MDI_CLIENT_UNLOCK(ct);
6868         return (ret);
6869 }
6870 
6871 static int
6872 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child)
6873 {
6874         int                     ret = MDI_SUCCESS;
6875         dev_info_t              *cdip;
6876         int                     circ;
6877 
6878         ASSERT(MDI_VHCI(vdip));
6879 
6880         /* ndi_devi_config_one */
6881         if (child) {
6882                 ASSERT(DEVI_BUSY_OWNED(vdip));
6883                 return (i_mdi_pm_pre_config_one(child));
6884         }
6885 
6886         /* devi_config_common */
6887         ndi_devi_enter(vdip, &circ);
6888         cdip = ddi_get_child(vdip);
6889         while (cdip) {
6890                 dev_info_t *next = ddi_get_next_sibling(cdip);
6891 
6892                 ret = i_mdi_pm_pre_config_one(cdip);
6893                 if (ret != MDI_SUCCESS)
6894                         break;
6895                 cdip = next;
6896         }
6897         ndi_devi_exit(vdip, circ);
6898         return (ret);
6899 }
6900 
6901 static int
6902 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags)
6903 {
6904         int             ret = MDI_SUCCESS;
6905         mdi_client_t    *ct;
6906 
6907         ct = i_devi_get_client(child);
6908         if (ct == NULL)
6909                 return (MDI_FAILURE);
6910 
6911         MDI_CLIENT_LOCK(ct);
6912         while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6913                 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6914 
6915         if (!i_ddi_devi_attached(child)) {
6916                 MDI_DEBUG(4, (MDI_NOTE, child, "node detached already\n"));
6917                 MDI_CLIENT_UNLOCK(ct);
6918                 return (MDI_SUCCESS);
6919         }
6920 
6921         if (MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6922             (flags & NDI_AUTODETACH)) {
6923                 MDI_DEBUG(4, (MDI_NOTE, child, "auto-modunload\n"));
6924                 MDI_CLIENT_UNLOCK(ct);
6925                 return (MDI_FAILURE);
6926         }
6927 
6928         if (ct->ct_powercnt_unconfig) {
6929                 MDI_DEBUG(4, (MDI_NOTE, child, "ct_powercnt_held\n"));
6930                 MDI_CLIENT_UNLOCK(ct);
6931                 *held = 1;
6932                 return (MDI_SUCCESS);
6933         }
6934 
6935         if (ct->ct_power_cnt == 0) {
6936                 ret = i_mdi_power_all_phci(ct);
6937         }
6938         MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n"));
6939         i_mdi_pm_hold_client(ct, ct->ct_path_count);
6940         ct->ct_powercnt_unconfig = 1;
6941         ct->ct_powercnt_reset = 0;
6942         MDI_CLIENT_UNLOCK(ct);
6943         if (ret == MDI_SUCCESS)
6944                 *held = 1;
6945         return (ret);
6946 }
6947 
6948 static int
6949 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held,
6950     int flags)
6951 {
6952         int                     ret = MDI_SUCCESS;
6953         dev_info_t              *cdip;
6954         int                     circ;
6955 
6956         ASSERT(MDI_VHCI(vdip));
6957         *held = 0;
6958 
6959         /* ndi_devi_unconfig_one */
6960         if (child) {
6961                 ASSERT(DEVI_BUSY_OWNED(vdip));
6962                 return (i_mdi_pm_pre_unconfig_one(child, held, flags));
6963         }
6964 
6965         /* devi_unconfig_common */
6966         ndi_devi_enter(vdip, &circ);
6967         cdip = ddi_get_child(vdip);
6968         while (cdip) {
6969                 dev_info_t *next = ddi_get_next_sibling(cdip);
6970 
6971                 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags);
6972                 cdip = next;
6973         }
6974         ndi_devi_exit(vdip, circ);
6975 
6976         if (*held)
6977                 ret = MDI_SUCCESS;
6978 
6979         return (ret);
6980 }
6981 
6982 static void
6983 i_mdi_pm_post_config_one(dev_info_t *child)
6984 {
6985         mdi_client_t    *ct;
6986 
6987         ct = i_devi_get_client(child);
6988         if (ct == NULL)
6989                 return;
6990 
6991         MDI_CLIENT_LOCK(ct);
6992         while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6993                 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6994 
6995         if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) {
6996                 MDI_DEBUG(4, (MDI_NOTE, child, "not configured\n"));
6997                 MDI_CLIENT_UNLOCK(ct);
6998                 return;
6999         }
7000 
7001         /* client has not been updated */
7002         if (MDI_CLIENT_IS_FAILED(ct)) {
7003                 MDI_DEBUG(4, (MDI_NOTE, child, "client failed\n"));
7004                 MDI_CLIENT_UNLOCK(ct);
7005                 return;
7006         }
7007 
7008         /* another thread might have powered it down or detached it */
7009         if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
7010             !DEVI_IS_ATTACHING(child)) ||
7011             (!i_ddi_devi_attached(child) &&
7012             !DEVI_IS_ATTACHING(child))) {
7013                 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n"));
7014                 i_mdi_pm_reset_client(ct);
7015         } else {
7016                 mdi_pathinfo_t  *pip, *next;
7017                 int     valid_path_count = 0;
7018 
7019                 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n"));
7020                 pip = ct->ct_path_head;
7021                 while (pip != NULL) {
7022                         MDI_PI_LOCK(pip);
7023                         next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
7024                         if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
7025                                 valid_path_count ++;
7026                         MDI_PI_UNLOCK(pip);
7027                         pip = next;
7028                 }
7029                 i_mdi_pm_rele_client(ct, valid_path_count);
7030         }
7031         ct->ct_powercnt_config = 0;
7032         MDI_CLIENT_UNLOCK(ct);
7033 }
7034 
7035 static void
7036 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child)
7037 {
7038         int             circ;
7039         dev_info_t      *cdip;
7040 
7041         ASSERT(MDI_VHCI(vdip));
7042 
7043         /* ndi_devi_config_one */
7044         if (child) {
7045                 ASSERT(DEVI_BUSY_OWNED(vdip));
7046                 i_mdi_pm_post_config_one(child);
7047                 return;
7048         }
7049 
7050         /* devi_config_common */
7051         ndi_devi_enter(vdip, &circ);
7052         cdip = ddi_get_child(vdip);
7053         while (cdip) {
7054                 dev_info_t *next = ddi_get_next_sibling(cdip);
7055 
7056                 i_mdi_pm_post_config_one(cdip);
7057                 cdip = next;
7058         }
7059         ndi_devi_exit(vdip, circ);
7060 }
7061 
7062 static void
7063 i_mdi_pm_post_unconfig_one(dev_info_t *child)
7064 {
7065         mdi_client_t    *ct;
7066 
7067         ct = i_devi_get_client(child);
7068         if (ct == NULL)
7069                 return;
7070 
7071         MDI_CLIENT_LOCK(ct);
7072         while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
7073                 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
7074 
7075         if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) {
7076                 MDI_DEBUG(4, (MDI_NOTE, child, "not held\n"));
7077                 MDI_CLIENT_UNLOCK(ct);
7078                 return;
7079         }
7080 
7081         /* failure detaching or another thread just attached it */
7082         if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
7083             i_ddi_devi_attached(child)) ||
7084             (!i_ddi_devi_attached(child) &&
7085             !DEVI_IS_ATTACHING(child))) {
7086                 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n"));
7087                 i_mdi_pm_reset_client(ct);
7088         } else {
7089                 mdi_pathinfo_t  *pip, *next;
7090                 int     valid_path_count = 0;
7091 
7092                 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n"));
7093                 pip = ct->ct_path_head;
7094                 while (pip != NULL) {
7095                         MDI_PI_LOCK(pip);
7096                         next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
7097                         if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
7098                                 valid_path_count ++;
7099                         MDI_PI_UNLOCK(pip);
7100                         pip = next;
7101                 }
7102                 i_mdi_pm_rele_client(ct, valid_path_count);
7103                 ct->ct_powercnt_unconfig = 0;
7104         }
7105 
7106         MDI_CLIENT_UNLOCK(ct);
7107 }
7108 
7109 static void
7110 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held)
7111 {
7112         int                     circ;
7113         dev_info_t              *cdip;
7114 
7115         ASSERT(MDI_VHCI(vdip));
7116 
7117         if (!held) {
7118                 MDI_DEBUG(4, (MDI_NOTE, vdip, "held = %d", held));
7119                 return;
7120         }
7121 
7122         if (child) {
7123                 ASSERT(DEVI_BUSY_OWNED(vdip));
7124                 i_mdi_pm_post_unconfig_one(child);
7125                 return;
7126         }
7127 
7128         ndi_devi_enter(vdip, &circ);
7129         cdip = ddi_get_child(vdip);
7130         while (cdip) {
7131                 dev_info_t *next = ddi_get_next_sibling(cdip);
7132 
7133                 i_mdi_pm_post_unconfig_one(cdip);
7134                 cdip = next;
7135         }
7136         ndi_devi_exit(vdip, circ);
7137 }
7138 
7139 int
7140 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags)
7141 {
7142         int                     circ, ret = MDI_SUCCESS;
7143         dev_info_t              *client_dip = NULL;
7144         mdi_client_t            *ct;
7145 
7146         /*
7147          * Handling ndi_devi_config_one and ndi_devi_unconfig_one.
7148          * Power up pHCI for the named client device.
7149          * Note: Before the client is enumerated under vhci by phci,
7150          * client_dip can be NULL. Then proceed to power up all the
7151          * pHCIs.
7152          */
7153         if (devnm != NULL) {
7154                 ndi_devi_enter(vdip, &circ);
7155                 client_dip = ndi_devi_findchild(vdip, devnm);
7156         }
7157 
7158         MDI_DEBUG(4, (MDI_NOTE, vdip,
7159             "op = %d %s %p", op, devnm ? devnm : "", (void *)client_dip));
7160 
7161         switch (op) {
7162         case MDI_PM_PRE_CONFIG:
7163                 ret = i_mdi_pm_pre_config(vdip, client_dip);
7164                 break;
7165 
7166         case MDI_PM_PRE_UNCONFIG:
7167                 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args,
7168                     flags);
7169                 break;
7170 
7171         case MDI_PM_POST_CONFIG:
7172                 i_mdi_pm_post_config(vdip, client_dip);
7173                 break;
7174 
7175         case MDI_PM_POST_UNCONFIG:
7176                 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args);
7177                 break;
7178 
7179         case MDI_PM_HOLD_POWER:
7180         case MDI_PM_RELE_POWER:
7181                 ASSERT(args);
7182 
7183                 client_dip = (dev_info_t *)args;
7184                 ASSERT(MDI_CLIENT(client_dip));
7185 
7186                 ct = i_devi_get_client(client_dip);
7187                 MDI_CLIENT_LOCK(ct);
7188 
7189                 if (op == MDI_PM_HOLD_POWER) {
7190                         if (ct->ct_power_cnt == 0) {
7191                                 (void) i_mdi_power_all_phci(ct);
7192                                 MDI_DEBUG(4, (MDI_NOTE, client_dip,
7193                                     "i_mdi_pm_hold_client\n"));
7194                                 i_mdi_pm_hold_client(ct, ct->ct_path_count);
7195                         }
7196                 } else {
7197                         if (DEVI_IS_ATTACHING(client_dip)) {
7198                                 MDI_DEBUG(4, (MDI_NOTE, client_dip,
7199                                     "i_mdi_pm_rele_client\n"));
7200                                 i_mdi_pm_rele_client(ct, ct->ct_path_count);
7201                         } else {
7202                                 MDI_DEBUG(4, (MDI_NOTE, client_dip,
7203                                     "i_mdi_pm_reset_client\n"));
7204                                 i_mdi_pm_reset_client(ct);
7205                         }
7206                 }
7207 
7208                 MDI_CLIENT_UNLOCK(ct);
7209                 break;
7210 
7211         default:
7212                 break;
7213         }
7214 
7215         if (devnm)
7216                 ndi_devi_exit(vdip, circ);
7217 
7218         return (ret);
7219 }
7220 
7221 int
7222 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class)
7223 {
7224         mdi_vhci_t *vhci;
7225 
7226         if (!MDI_VHCI(dip))
7227                 return (MDI_FAILURE);
7228 
7229         if (mdi_class) {
7230                 vhci = DEVI(dip)->devi_mdi_xhci;
7231                 ASSERT(vhci);
7232                 *mdi_class = vhci->vh_class;
7233         }
7234 
7235         return (MDI_SUCCESS);
7236 }
7237 
7238 int
7239 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class)
7240 {
7241         mdi_phci_t *phci;
7242 
7243         if (!MDI_PHCI(dip))
7244                 return (MDI_FAILURE);
7245 
7246         if (mdi_class) {
7247                 phci = DEVI(dip)->devi_mdi_xhci;
7248                 ASSERT(phci);
7249                 *mdi_class = phci->ph_vhci->vh_class;
7250         }
7251 
7252         return (MDI_SUCCESS);
7253 }
7254 
7255 int
7256 mdi_component_is_client(dev_info_t *dip, const char **mdi_class)
7257 {
7258         mdi_client_t *client;
7259 
7260         if (!MDI_CLIENT(dip))
7261                 return (MDI_FAILURE);
7262 
7263         if (mdi_class) {
7264                 client = DEVI(dip)->devi_mdi_client;
7265                 ASSERT(client);
7266                 *mdi_class = client->ct_vhci->vh_class;
7267         }
7268 
7269         return (MDI_SUCCESS);
7270 }
7271 
7272 void *
7273 mdi_client_get_vhci_private(dev_info_t *dip)
7274 {
7275         ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
7276         if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
7277                 mdi_client_t    *ct;
7278                 ct = i_devi_get_client(dip);
7279                 return (ct->ct_vprivate);
7280         }
7281         return (NULL);
7282 }
7283 
7284 void
7285 mdi_client_set_vhci_private(dev_info_t *dip, void *data)
7286 {
7287         ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
7288         if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
7289                 mdi_client_t    *ct;
7290                 ct = i_devi_get_client(dip);
7291                 ct->ct_vprivate = data;
7292         }
7293 }
7294 /*
7295  * mdi_pi_get_vhci_private():
7296  *              Get the vhci private information associated with the
7297  *              mdi_pathinfo node
7298  */
7299 void *
7300 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip)
7301 {
7302         caddr_t vprivate = NULL;
7303         if (pip) {
7304                 vprivate = MDI_PI(pip)->pi_vprivate;
7305         }
7306         return (vprivate);
7307 }
7308 
7309 /*
7310  * mdi_pi_set_vhci_private():
7311  *              Set the vhci private information in the mdi_pathinfo node
7312  */
7313 void
7314 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv)
7315 {
7316         if (pip) {
7317                 MDI_PI(pip)->pi_vprivate = priv;
7318         }
7319 }
7320 
7321 /*
7322  * mdi_phci_get_vhci_private():
7323  *              Get the vhci private information associated with the
7324  *              mdi_phci node
7325  */
7326 void *
7327 mdi_phci_get_vhci_private(dev_info_t *dip)
7328 {
7329         ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
7330         if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
7331                 mdi_phci_t      *ph;
7332                 ph = i_devi_get_phci(dip);
7333                 return (ph->ph_vprivate);
7334         }
7335         return (NULL);
7336 }
7337 
7338 /*
7339  * mdi_phci_set_vhci_private():
7340  *              Set the vhci private information in the mdi_phci node
7341  */
7342 void
7343 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv)
7344 {
7345         ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
7346         if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
7347                 mdi_phci_t      *ph;
7348                 ph = i_devi_get_phci(dip);
7349                 ph->ph_vprivate = priv;
7350         }
7351 }
7352 
7353 int
7354 mdi_pi_ishidden(mdi_pathinfo_t *pip)
7355 {
7356         return (MDI_PI_FLAGS_IS_HIDDEN(pip));
7357 }
7358 
7359 int
7360 mdi_pi_device_isremoved(mdi_pathinfo_t *pip)
7361 {
7362         return (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip));
7363 }
7364 
7365 /* Return 1 if all client paths are device_removed */
7366 static int
7367 i_mdi_client_all_devices_removed(mdi_client_t *ct)
7368 {
7369         mdi_pathinfo_t  *pip;
7370         int             all_devices_removed = 1;
7371 
7372         MDI_CLIENT_LOCK(ct);
7373         for (pip = ct->ct_path_head; pip;
7374             pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) {
7375                 if (!mdi_pi_device_isremoved(pip)) {
7376                         all_devices_removed = 0;
7377                         break;
7378                 }
7379         }
7380         MDI_CLIENT_UNLOCK(ct);
7381         return (all_devices_removed);
7382 }
7383 
7384 /*
7385  * When processing path hotunplug, represent device removal.
7386  */
7387 int
7388 mdi_pi_device_remove(mdi_pathinfo_t *pip)
7389 {
7390         mdi_client_t    *ct;
7391 
7392         MDI_PI_LOCK(pip);
7393         if (mdi_pi_device_isremoved(pip)) {
7394                 MDI_PI_UNLOCK(pip);
7395                 return (0);
7396         }
7397         MDI_PI_FLAGS_SET_DEVICE_REMOVED(pip);
7398         MDI_PI_FLAGS_SET_HIDDEN(pip);
7399         MDI_PI_UNLOCK(pip);
7400 
7401         /*
7402          * If all paths associated with the client are now DEVICE_REMOVED,
7403          * reflect DEVICE_REMOVED in the client.
7404          */
7405         ct = MDI_PI(pip)->pi_client;
7406         if (ct && ct->ct_dip && i_mdi_client_all_devices_removed(ct))
7407                 (void) ndi_devi_device_remove(ct->ct_dip);
7408         else
7409                 i_ddi_di_cache_invalidate();
7410 
7411         return (1);
7412 }
7413 
7414 /*
7415  * When processing hotplug, if a path marked mdi_pi_device_isremoved()
7416  * is now accessible then this interfaces is used to represent device insertion.
7417  */
7418 int
7419 mdi_pi_device_insert(mdi_pathinfo_t *pip)
7420 {
7421         MDI_PI_LOCK(pip);
7422         if (!mdi_pi_device_isremoved(pip)) {
7423                 MDI_PI_UNLOCK(pip);
7424                 return (0);
7425         }
7426         MDI_PI_FLAGS_CLR_DEVICE_REMOVED(pip);
7427         MDI_PI_FLAGS_CLR_HIDDEN(pip);
7428         MDI_PI_UNLOCK(pip);
7429 
7430         i_ddi_di_cache_invalidate();
7431 
7432         return (1);
7433 }
7434 
7435 /*
7436  * List of vhci class names:
7437  * A vhci class name must be in this list only if the corresponding vhci
7438  * driver intends to use the mdi provided bus config implementation
7439  * (i.e., mdi_vhci_bus_config()).
7440  */
7441 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB };
7442 #define N_VHCI_CLASSES  (sizeof (vhci_class_list) / sizeof (char *))
7443 
7444 /*
7445  * During boot time, the on-disk vhci cache for every vhci class is read
7446  * in the form of an nvlist and stored here.
7447  */
7448 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES];
7449 
7450 /* nvpair names in vhci cache nvlist */
7451 #define MDI_VHCI_CACHE_VERSION  1
7452 #define MDI_NVPNAME_VERSION     "version"
7453 #define MDI_NVPNAME_PHCIS       "phcis"
7454 #define MDI_NVPNAME_CTADDRMAP   "clientaddrmap"
7455 
7456 /*
7457  * Given vhci class name, return its on-disk vhci cache filename.
7458  * Memory for the returned filename which includes the full path is allocated
7459  * by this function.
7460  */
7461 static char *
7462 vhclass2vhcache_filename(char *vhclass)
7463 {
7464         char *filename;
7465         int len;
7466         static char *fmt = "/etc/devices/mdi_%s_cache";
7467 
7468         /*
7469          * fmt contains the on-disk vhci cache file name format;
7470          * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache".
7471          */
7472 
7473         /* the -1 below is to account for "%s" in the format string */
7474         len = strlen(fmt) + strlen(vhclass) - 1;
7475         filename = kmem_alloc(len, KM_SLEEP);
7476         (void) snprintf(filename, len, fmt, vhclass);
7477         ASSERT(len == (strlen(filename) + 1));
7478         return (filename);
7479 }
7480 
7481 /*
7482  * initialize the vhci cache related data structures and read the on-disk
7483  * vhci cached data into memory.
7484  */
7485 static void
7486 setup_vhci_cache(mdi_vhci_t *vh)
7487 {
7488         mdi_vhci_config_t *vhc;
7489         mdi_vhci_cache_t *vhcache;
7490         int i;
7491         nvlist_t *nvl = NULL;
7492 
7493         vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP);
7494         vh->vh_config = vhc;
7495         vhcache = &vhc->vhc_vhcache;
7496 
7497         vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class);
7498 
7499         mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL);
7500         cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL);
7501 
7502         rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL);
7503 
7504         /*
7505          * Create string hash; same as mod_hash_create_strhash() except that
7506          * we use NULL key destructor.
7507          */
7508         vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class,
7509             mdi_bus_config_cache_hash_size,
7510             mod_hash_null_keydtor, mod_hash_null_valdtor,
7511             mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
7512 
7513         /*
7514          * The on-disk vhci cache is read during booting prior to the
7515          * lights-out period by mdi_read_devices_files().
7516          */
7517         for (i = 0; i < N_VHCI_CLASSES; i++) {
7518                 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) {
7519                         nvl = vhcache_nvl[i];
7520                         vhcache_nvl[i] = NULL;
7521                         break;
7522                 }
7523         }
7524 
7525         /*
7526          * this is to cover the case of some one manually causing unloading
7527          * (or detaching) and reloading (or attaching) of a vhci driver.
7528          */
7529         if (nvl == NULL && modrootloaded)
7530                 nvl = read_on_disk_vhci_cache(vh->vh_class);
7531 
7532         if (nvl != NULL) {
7533                 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7534                 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS)
7535                         vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
7536                 else  {
7537                         cmn_err(CE_WARN,
7538                             "%s: data file corrupted, will recreate",
7539                             vhc->vhc_vhcache_filename);
7540                 }
7541                 rw_exit(&vhcache->vhcache_lock);
7542                 nvlist_free(nvl);
7543         }
7544 
7545         vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc,
7546             CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush");
7547 
7548         vhc->vhc_path_discovery_boot = mdi_path_discovery_boot;
7549         vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot;
7550 }
7551 
7552 /*
7553  * free all vhci cache related resources
7554  */
7555 static int
7556 destroy_vhci_cache(mdi_vhci_t *vh)
7557 {
7558         mdi_vhci_config_t *vhc = vh->vh_config;
7559         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7560         mdi_vhcache_phci_t *cphci, *cphci_next;
7561         mdi_vhcache_client_t *cct, *cct_next;
7562         mdi_vhcache_pathinfo_t *cpi, *cpi_next;
7563 
7564         if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS)
7565                 return (MDI_FAILURE);
7566 
7567         kmem_free(vhc->vhc_vhcache_filename,
7568             strlen(vhc->vhc_vhcache_filename) + 1);
7569 
7570         mod_hash_destroy_strhash(vhcache->vhcache_client_hash);
7571 
7572         for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7573             cphci = cphci_next) {
7574                 cphci_next = cphci->cphci_next;
7575                 free_vhcache_phci(cphci);
7576         }
7577 
7578         for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) {
7579                 cct_next = cct->cct_next;
7580                 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) {
7581                         cpi_next = cpi->cpi_next;
7582                         free_vhcache_pathinfo(cpi);
7583                 }
7584                 free_vhcache_client(cct);
7585         }
7586 
7587         rw_destroy(&vhcache->vhcache_lock);
7588 
7589         mutex_destroy(&vhc->vhc_lock);
7590         cv_destroy(&vhc->vhc_cv);
7591         kmem_free(vhc, sizeof (mdi_vhci_config_t));
7592         return (MDI_SUCCESS);
7593 }
7594 
7595 /*
7596  * Stop all vhci cache related async threads and free their resources.
7597  */
7598 static int
7599 stop_vhcache_async_threads(mdi_vhci_config_t *vhc)
7600 {
7601         mdi_async_client_config_t *acc, *acc_next;
7602 
7603         mutex_enter(&vhc->vhc_lock);
7604         vhc->vhc_flags |= MDI_VHC_EXIT;
7605         ASSERT(vhc->vhc_acc_thrcount >= 0);
7606         cv_broadcast(&vhc->vhc_cv);
7607 
7608         while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) ||
7609             vhc->vhc_acc_thrcount != 0) {
7610                 mutex_exit(&vhc->vhc_lock);
7611                 delay_random(mdi_delay);
7612                 mutex_enter(&vhc->vhc_lock);
7613         }
7614 
7615         vhc->vhc_flags &= ~MDI_VHC_EXIT;
7616 
7617         for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) {
7618                 acc_next = acc->acc_next;
7619                 free_async_client_config(acc);
7620         }
7621         vhc->vhc_acc_list_head = NULL;
7622         vhc->vhc_acc_list_tail = NULL;
7623         vhc->vhc_acc_count = 0;
7624 
7625         if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7626                 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7627                 mutex_exit(&vhc->vhc_lock);
7628                 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) {
7629                         vhcache_dirty(vhc);
7630                         return (MDI_FAILURE);
7631                 }
7632         } else
7633                 mutex_exit(&vhc->vhc_lock);
7634 
7635         if (callb_delete(vhc->vhc_cbid) != 0)
7636                 return (MDI_FAILURE);
7637 
7638         return (MDI_SUCCESS);
7639 }
7640 
7641 /*
7642  * Stop vhci cache flush thread
7643  */
7644 /* ARGSUSED */
7645 static boolean_t
7646 stop_vhcache_flush_thread(void *arg, int code)
7647 {
7648         mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7649 
7650         mutex_enter(&vhc->vhc_lock);
7651         vhc->vhc_flags |= MDI_VHC_EXIT;
7652         cv_broadcast(&vhc->vhc_cv);
7653 
7654         while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
7655                 mutex_exit(&vhc->vhc_lock);
7656                 delay_random(mdi_delay);
7657                 mutex_enter(&vhc->vhc_lock);
7658         }
7659 
7660         if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7661                 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7662                 mutex_exit(&vhc->vhc_lock);
7663                 (void) flush_vhcache(vhc, 1);
7664         } else
7665                 mutex_exit(&vhc->vhc_lock);
7666 
7667         return (B_TRUE);
7668 }
7669 
7670 /*
7671  * Enqueue the vhcache phci (cphci) at the tail of the list
7672  */
7673 static void
7674 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci)
7675 {
7676         cphci->cphci_next = NULL;
7677         if (vhcache->vhcache_phci_head == NULL)
7678                 vhcache->vhcache_phci_head = cphci;
7679         else
7680                 vhcache->vhcache_phci_tail->cphci_next = cphci;
7681         vhcache->vhcache_phci_tail = cphci;
7682 }
7683 
7684 /*
7685  * Enqueue the vhcache pathinfo (cpi) at the tail of the list
7686  */
7687 static void
7688 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7689     mdi_vhcache_pathinfo_t *cpi)
7690 {
7691         cpi->cpi_next = NULL;
7692         if (cct->cct_cpi_head == NULL)
7693                 cct->cct_cpi_head = cpi;
7694         else
7695                 cct->cct_cpi_tail->cpi_next = cpi;
7696         cct->cct_cpi_tail = cpi;
7697 }
7698 
7699 /*
7700  * Enqueue the vhcache pathinfo (cpi) at the correct location in the
7701  * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
7702  * flag set come at the beginning of the list. All cpis which have this
7703  * flag set come at the end of the list.
7704  */
7705 static void
7706 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7707     mdi_vhcache_pathinfo_t *newcpi)
7708 {
7709         mdi_vhcache_pathinfo_t *cpi, *prev_cpi;
7710 
7711         if (cct->cct_cpi_head == NULL ||
7712             (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))
7713                 enqueue_tail_vhcache_pathinfo(cct, newcpi);
7714         else {
7715                 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL &&
7716                     !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST);
7717                     prev_cpi = cpi, cpi = cpi->cpi_next)
7718                         ;
7719 
7720                 if (prev_cpi == NULL)
7721                         cct->cct_cpi_head = newcpi;
7722                 else
7723                         prev_cpi->cpi_next = newcpi;
7724 
7725                 newcpi->cpi_next = cpi;
7726 
7727                 if (cpi == NULL)
7728                         cct->cct_cpi_tail = newcpi;
7729         }
7730 }
7731 
7732 /*
7733  * Enqueue the vhcache client (cct) at the tail of the list
7734  */
7735 static void
7736 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache,
7737     mdi_vhcache_client_t *cct)
7738 {
7739         cct->cct_next = NULL;
7740         if (vhcache->vhcache_client_head == NULL)
7741                 vhcache->vhcache_client_head = cct;
7742         else
7743                 vhcache->vhcache_client_tail->cct_next = cct;
7744         vhcache->vhcache_client_tail = cct;
7745 }
7746 
7747 static void
7748 free_string_array(char **str, int nelem)
7749 {
7750         int i;
7751 
7752         if (str) {
7753                 for (i = 0; i < nelem; i++) {
7754                         if (str[i])
7755                                 kmem_free(str[i], strlen(str[i]) + 1);
7756                 }
7757                 kmem_free(str, sizeof (char *) * nelem);
7758         }
7759 }
7760 
7761 static void
7762 free_vhcache_phci(mdi_vhcache_phci_t *cphci)
7763 {
7764         kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1);
7765         kmem_free(cphci, sizeof (*cphci));
7766 }
7767 
7768 static void
7769 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi)
7770 {
7771         kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1);
7772         kmem_free(cpi, sizeof (*cpi));
7773 }
7774 
7775 static void
7776 free_vhcache_client(mdi_vhcache_client_t *cct)
7777 {
7778         kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1);
7779         kmem_free(cct, sizeof (*cct));
7780 }
7781 
7782 static char *
7783 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len)
7784 {
7785         char *name_addr;
7786         int len;
7787 
7788         len = strlen(ct_name) + strlen(ct_addr) + 2;
7789         name_addr = kmem_alloc(len, KM_SLEEP);
7790         (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr);
7791 
7792         if (ret_len)
7793                 *ret_len = len;
7794         return (name_addr);
7795 }
7796 
7797 /*
7798  * Copy the contents of paddrnvl to vhci cache.
7799  * paddrnvl nvlist contains path information for a vhci client.
7800  * See the comment in mainnvl_to_vhcache() for the format of this nvlist.
7801  */
7802 static void
7803 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[],
7804     mdi_vhcache_client_t *cct)
7805 {
7806         nvpair_t *nvp = NULL;
7807         mdi_vhcache_pathinfo_t *cpi;
7808         uint_t nelem;
7809         uint32_t *val;
7810 
7811         while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7812                 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY);
7813                 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
7814                 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7815                 (void) nvpair_value_uint32_array(nvp, &val, &nelem);
7816                 ASSERT(nelem == 2);
7817                 cpi->cpi_cphci = cphci_list[val[0]];
7818                 cpi->cpi_flags = val[1];
7819                 enqueue_tail_vhcache_pathinfo(cct, cpi);
7820         }
7821 }
7822 
7823 /*
7824  * Copy the contents of caddrmapnvl to vhci cache.
7825  * caddrmapnvl nvlist contains vhci client address to phci client address
7826  * mappings. See the comment in mainnvl_to_vhcache() for the format of
7827  * this nvlist.
7828  */
7829 static void
7830 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl,
7831     mdi_vhcache_phci_t *cphci_list[])
7832 {
7833         nvpair_t *nvp = NULL;
7834         nvlist_t *paddrnvl;
7835         mdi_vhcache_client_t *cct;
7836 
7837         while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7838                 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST);
7839                 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
7840                 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7841                 (void) nvpair_value_nvlist(nvp, &paddrnvl);
7842                 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct);
7843                 /* the client must contain at least one path */
7844                 ASSERT(cct->cct_cpi_head != NULL);
7845 
7846                 enqueue_vhcache_client(vhcache, cct);
7847                 (void) mod_hash_insert(vhcache->vhcache_client_hash,
7848                     (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
7849         }
7850 }
7851 
7852 /*
7853  * Copy the contents of the main nvlist to vhci cache.
7854  *
7855  * VHCI busconfig cached data is stored in the form of a nvlist on the disk.
7856  * The nvlist contains the mappings between the vhci client addresses and
7857  * their corresponding phci client addresses.
7858  *
7859  * The structure of the nvlist is as follows:
7860  *
7861  * Main nvlist:
7862  *      NAME            TYPE            DATA
7863  *      version         int32           version number
7864  *      phcis           string array    array of phci paths
7865  *      clientaddrmap   nvlist_t        c2paddrs_nvl (see below)
7866  *
7867  * structure of c2paddrs_nvl:
7868  *      NAME            TYPE            DATA
7869  *      caddr1          nvlist_t        paddrs_nvl1
7870  *      caddr2          nvlist_t        paddrs_nvl2
7871  *      ...
7872  * where caddr1, caddr2, ... are vhci client name and addresses in the
7873  * form of "<clientname>@<clientaddress>".
7874  * (for example: "ssd@2000002037cd9f72");
7875  * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information.
7876  *
7877  * structure of paddrs_nvl:
7878  *      NAME            TYPE            DATA
7879  *      pi_addr1        uint32_array    (phci-id, cpi_flags)
7880  *      pi_addr2        uint32_array    (phci-id, cpi_flags)
7881  *      ...
7882  * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes
7883  * (so called pi_addrs, for example: "w2100002037cd9f72,0");
7884  * phci-ids are integers that identify pHCIs to which the
7885  * the bus specific address belongs to. These integers are used as an index
7886  * into to the phcis string array in the main nvlist to get the pHCI path.
7887  */
7888 static int
7889 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl)
7890 {
7891         char **phcis, **phci_namep;
7892         uint_t nphcis;
7893         mdi_vhcache_phci_t *cphci, **cphci_list;
7894         nvlist_t *caddrmapnvl;
7895         int32_t ver;
7896         int i;
7897         size_t cphci_list_size;
7898 
7899         ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock));
7900 
7901         if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 ||
7902             ver != MDI_VHCI_CACHE_VERSION)
7903                 return (MDI_FAILURE);
7904 
7905         if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis,
7906             &nphcis) != 0)
7907                 return (MDI_SUCCESS);
7908 
7909         ASSERT(nphcis > 0);
7910 
7911         cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis;
7912         cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP);
7913         for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) {
7914                 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP);
7915                 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP);
7916                 enqueue_vhcache_phci(vhcache, cphci);
7917                 cphci_list[i] = cphci;
7918         }
7919 
7920         ASSERT(vhcache->vhcache_phci_head != NULL);
7921 
7922         if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0)
7923                 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list);
7924 
7925         kmem_free(cphci_list, cphci_list_size);
7926         return (MDI_SUCCESS);
7927 }
7928 
7929 /*
7930  * Build paddrnvl for the specified client using the information in the
7931  * vhci cache and add it to the caddrmapnnvl.
7932  * Returns 0 on success, errno on failure.
7933  */
7934 static int
7935 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct,
7936     nvlist_t *caddrmapnvl)
7937 {
7938         mdi_vhcache_pathinfo_t *cpi;
7939         nvlist_t *nvl;
7940         int err;
7941         uint32_t val[2];
7942 
7943         ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7944 
7945         if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0)
7946                 return (err);
7947 
7948         for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7949                 val[0] = cpi->cpi_cphci->cphci_id;
7950                 val[1] = cpi->cpi_flags;
7951                 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2))
7952                     != 0)
7953                         goto out;
7954         }
7955 
7956         err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl);
7957 out:
7958         nvlist_free(nvl);
7959         return (err);
7960 }
7961 
7962 /*
7963  * Build caddrmapnvl using the information in the vhci cache
7964  * and add it to the mainnvl.
7965  * Returns 0 on success, errno on failure.
7966  */
7967 static int
7968 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl)
7969 {
7970         mdi_vhcache_client_t *cct;
7971         nvlist_t *nvl;
7972         int err;
7973 
7974         ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7975 
7976         if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0)
7977                 return (err);
7978 
7979         for (cct = vhcache->vhcache_client_head; cct != NULL;
7980             cct = cct->cct_next) {
7981                 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0)
7982                         goto out;
7983         }
7984 
7985         err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl);
7986 out:
7987         nvlist_free(nvl);
7988         return (err);
7989 }
7990 
7991 /*
7992  * Build nvlist using the information in the vhci cache.
7993  * See the comment in mainnvl_to_vhcache() for the format of the nvlist.
7994  * Returns nvl on success, NULL on failure.
7995  */
7996 static nvlist_t *
7997 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache)
7998 {
7999         mdi_vhcache_phci_t *cphci;
8000         uint_t phci_count;
8001         char **phcis;
8002         nvlist_t *nvl;
8003         int err, i;
8004 
8005         if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) {
8006                 nvl = NULL;
8007                 goto out;
8008         }
8009 
8010         if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION,
8011             MDI_VHCI_CACHE_VERSION)) != 0)
8012                 goto out;
8013 
8014         rw_enter(&vhcache->vhcache_lock, RW_READER);
8015         if (vhcache->vhcache_phci_head == NULL) {
8016                 rw_exit(&vhcache->vhcache_lock);
8017                 return (nvl);
8018         }
8019 
8020         phci_count = 0;
8021         for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8022             cphci = cphci->cphci_next)
8023                 cphci->cphci_id = phci_count++;
8024 
8025         /* build phci pathname list */
8026         phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP);
8027         for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL;
8028             cphci = cphci->cphci_next, i++)
8029                 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP);
8030 
8031         err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis,
8032             phci_count);
8033         free_string_array(phcis, phci_count);
8034 
8035         if (err == 0 &&
8036             (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) {
8037                 rw_exit(&vhcache->vhcache_lock);
8038                 return (nvl);
8039         }
8040 
8041         rw_exit(&vhcache->vhcache_lock);
8042 out:
8043         if (nvl)
8044                 nvlist_free(nvl);
8045         return (NULL);
8046 }
8047 
8048 /*
8049  * Lookup vhcache phci structure for the specified phci path.
8050  */
8051 static mdi_vhcache_phci_t *
8052 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path)
8053 {
8054         mdi_vhcache_phci_t *cphci;
8055 
8056         ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8057 
8058         for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8059             cphci = cphci->cphci_next) {
8060                 if (strcmp(cphci->cphci_path, phci_path) == 0)
8061                         return (cphci);
8062         }
8063 
8064         return (NULL);
8065 }
8066 
8067 /*
8068  * Lookup vhcache phci structure for the specified phci.
8069  */
8070 static mdi_vhcache_phci_t *
8071 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph)
8072 {
8073         mdi_vhcache_phci_t *cphci;
8074 
8075         ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8076 
8077         for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8078             cphci = cphci->cphci_next) {
8079                 if (cphci->cphci_phci == ph)
8080                         return (cphci);
8081         }
8082 
8083         return (NULL);
8084 }
8085 
8086 /*
8087  * Add the specified phci to the vhci cache if not already present.
8088  */
8089 static void
8090 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
8091 {
8092         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8093         mdi_vhcache_phci_t *cphci;
8094         char *pathname;
8095         int cache_updated;
8096 
8097         rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8098 
8099         pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8100         (void) ddi_pathname(ph->ph_dip, pathname);
8101         if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname))
8102             != NULL) {
8103                 cphci->cphci_phci = ph;
8104                 cache_updated = 0;
8105         } else {
8106                 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP);
8107                 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP);
8108                 cphci->cphci_phci = ph;
8109                 enqueue_vhcache_phci(vhcache, cphci);
8110                 cache_updated = 1;
8111         }
8112 
8113         rw_exit(&vhcache->vhcache_lock);
8114 
8115         /*
8116          * Since a new phci has been added, reset
8117          * vhc_path_discovery_cutoff_time to allow for discovery of paths
8118          * during next vhcache_discover_paths().
8119          */
8120         mutex_enter(&vhc->vhc_lock);
8121         vhc->vhc_path_discovery_cutoff_time = 0;
8122         mutex_exit(&vhc->vhc_lock);
8123 
8124         kmem_free(pathname, MAXPATHLEN);
8125         if (cache_updated)
8126                 vhcache_dirty(vhc);
8127 }
8128 
8129 /*
8130  * Remove the reference to the specified phci from the vhci cache.
8131  */
8132 static void
8133 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
8134 {
8135         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8136         mdi_vhcache_phci_t *cphci;
8137 
8138         rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8139         if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) {
8140                 /* do not remove the actual mdi_vhcache_phci structure */
8141                 cphci->cphci_phci = NULL;
8142         }
8143         rw_exit(&vhcache->vhcache_lock);
8144 }
8145 
8146 static void
8147 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst,
8148     mdi_vhcache_lookup_token_t *src)
8149 {
8150         if (src == NULL) {
8151                 dst->lt_cct = NULL;
8152                 dst->lt_cct_lookup_time = 0;
8153         } else {
8154                 dst->lt_cct = src->lt_cct;
8155                 dst->lt_cct_lookup_time = src->lt_cct_lookup_time;
8156         }
8157 }
8158 
8159 /*
8160  * Look up vhcache client for the specified client.
8161  */
8162 static mdi_vhcache_client_t *
8163 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr,
8164     mdi_vhcache_lookup_token_t *token)
8165 {
8166         mod_hash_val_t hv;
8167         char *name_addr;
8168         int len;
8169 
8170         ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8171 
8172         /*
8173          * If no vhcache clean occurred since the last lookup, we can
8174          * simply return the cct from the last lookup operation.
8175          * It works because ccts are never freed except during the vhcache
8176          * cleanup operation.
8177          */
8178         if (token != NULL &&
8179             vhcache->vhcache_clean_time < token->lt_cct_lookup_time)
8180                 return (token->lt_cct);
8181 
8182         name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len);
8183         if (mod_hash_find(vhcache->vhcache_client_hash,
8184             (mod_hash_key_t)name_addr, &hv) == 0) {
8185                 if (token) {
8186                         token->lt_cct = (mdi_vhcache_client_t *)hv;
8187                         token->lt_cct_lookup_time = ddi_get_lbolt64();
8188                 }
8189         } else {
8190                 if (token) {
8191                         token->lt_cct = NULL;
8192                         token->lt_cct_lookup_time = 0;
8193                 }
8194                 hv = NULL;
8195         }
8196         kmem_free(name_addr, len);
8197         return ((mdi_vhcache_client_t *)hv);
8198 }
8199 
8200 /*
8201  * Add the specified path to the vhci cache if not already present.
8202  * Also add the vhcache client for the client corresponding to this path
8203  * if it doesn't already exist.
8204  */
8205 static void
8206 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
8207 {
8208         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8209         mdi_vhcache_client_t *cct;
8210         mdi_vhcache_pathinfo_t *cpi;
8211         mdi_phci_t *ph = pip->pi_phci;
8212         mdi_client_t *ct = pip->pi_client;
8213         int cache_updated = 0;
8214 
8215         rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8216 
8217         /* if vhcache client for this pip doesn't already exist, add it */
8218         if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
8219             NULL)) == NULL) {
8220                 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
8221                 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname,
8222                     ct->ct_guid, NULL);
8223                 enqueue_vhcache_client(vhcache, cct);
8224                 (void) mod_hash_insert(vhcache->vhcache_client_hash,
8225                     (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
8226                 cache_updated = 1;
8227         }
8228 
8229         for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8230                 if (cpi->cpi_cphci->cphci_phci == ph &&
8231                     strcmp(cpi->cpi_addr, pip->pi_addr) == 0) {
8232                         cpi->cpi_pip = pip;
8233                         if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) {
8234                                 cpi->cpi_flags &=
8235                                     ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8236                                 sort_vhcache_paths(cct);
8237                                 cache_updated = 1;
8238                         }
8239                         break;
8240                 }
8241         }
8242 
8243         if (cpi == NULL) {
8244                 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
8245                 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP);
8246                 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph);
8247                 ASSERT(cpi->cpi_cphci != NULL);
8248                 cpi->cpi_pip = pip;
8249                 enqueue_vhcache_pathinfo(cct, cpi);
8250                 cache_updated = 1;
8251         }
8252 
8253         rw_exit(&vhcache->vhcache_lock);
8254 
8255         if (cache_updated)
8256                 vhcache_dirty(vhc);
8257 }
8258 
8259 /*
8260  * Remove the reference to the specified path from the vhci cache.
8261  */
8262 static void
8263 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
8264 {
8265         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8266         mdi_client_t *ct = pip->pi_client;
8267         mdi_vhcache_client_t *cct;
8268         mdi_vhcache_pathinfo_t *cpi;
8269 
8270         rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8271         if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
8272             NULL)) != NULL) {
8273                 for (cpi = cct->cct_cpi_head; cpi != NULL;
8274                     cpi = cpi->cpi_next) {
8275                         if (cpi->cpi_pip == pip) {
8276                                 cpi->cpi_pip = NULL;
8277                                 break;
8278                         }
8279                 }
8280         }
8281         rw_exit(&vhcache->vhcache_lock);
8282 }
8283 
8284 /*
8285  * Flush the vhci cache to disk.
8286  * Returns MDI_SUCCESS on success, MDI_FAILURE on failure.
8287  */
8288 static int
8289 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag)
8290 {
8291         nvlist_t *nvl;
8292         int err;
8293         int rv;
8294 
8295         /*
8296          * It is possible that the system may shutdown before
8297          * i_ddi_io_initialized (during stmsboot for example). To allow for
8298          * flushing the cache in this case do not check for
8299          * i_ddi_io_initialized when force flag is set.
8300          */
8301         if (force_flag == 0 && !i_ddi_io_initialized())
8302                 return (MDI_FAILURE);
8303 
8304         if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) {
8305                 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl);
8306                 nvlist_free(nvl);
8307         } else
8308                 err = EFAULT;
8309 
8310         rv = MDI_SUCCESS;
8311         mutex_enter(&vhc->vhc_lock);
8312         if (err != 0) {
8313                 if (err == EROFS) {
8314                         vhc->vhc_flags |= MDI_VHC_READONLY_FS;
8315                         vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR |
8316                             MDI_VHC_VHCACHE_DIRTY);
8317                 } else {
8318                         if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) {
8319                                 cmn_err(CE_CONT, "%s: update failed\n",
8320                                     vhc->vhc_vhcache_filename);
8321                                 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR;
8322                         }
8323                         rv = MDI_FAILURE;
8324                 }
8325         } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) {
8326                 cmn_err(CE_CONT,
8327                     "%s: update now ok\n", vhc->vhc_vhcache_filename);
8328                 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR;
8329         }
8330         mutex_exit(&vhc->vhc_lock);
8331 
8332         return (rv);
8333 }
8334 
8335 /*
8336  * Call flush_vhcache() to flush the vhci cache at the scheduled time.
8337  * Exits itself if left idle for the idle timeout period.
8338  */
8339 static void
8340 vhcache_flush_thread(void *arg)
8341 {
8342         mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8343         clock_t idle_time, quit_at_ticks;
8344         callb_cpr_t cprinfo;
8345 
8346         /* number of seconds to sleep idle before exiting */
8347         idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND;
8348 
8349         CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
8350             "mdi_vhcache_flush");
8351         mutex_enter(&vhc->vhc_lock);
8352         for (; ; ) {
8353                 while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8354                     (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) {
8355                         if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) {
8356                                 CALLB_CPR_SAFE_BEGIN(&cprinfo);
8357                                 (void) cv_timedwait(&vhc->vhc_cv,
8358                                     &vhc->vhc_lock, vhc->vhc_flush_at_ticks);
8359                                 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8360                         } else {
8361                                 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
8362                                 mutex_exit(&vhc->vhc_lock);
8363 
8364                                 if (flush_vhcache(vhc, 0) != MDI_SUCCESS)
8365                                         vhcache_dirty(vhc);
8366 
8367                                 mutex_enter(&vhc->vhc_lock);
8368                         }
8369                 }
8370 
8371                 quit_at_ticks = ddi_get_lbolt() + idle_time;
8372 
8373                 while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8374                     !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) &&
8375                     ddi_get_lbolt() < quit_at_ticks) {
8376                         CALLB_CPR_SAFE_BEGIN(&cprinfo);
8377                         (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8378                             quit_at_ticks);
8379                         CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8380                 }
8381 
8382                 if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
8383                     !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY))
8384                         goto out;
8385         }
8386 
8387 out:
8388         vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD;
8389         /* CALLB_CPR_EXIT releases the vhc->vhc_lock */
8390         CALLB_CPR_EXIT(&cprinfo);
8391 }
8392 
8393 /*
8394  * Make vhci cache dirty and schedule flushing by vhcache flush thread.
8395  */
8396 static void
8397 vhcache_dirty(mdi_vhci_config_t *vhc)
8398 {
8399         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8400         int create_thread;
8401 
8402         rw_enter(&vhcache->vhcache_lock, RW_READER);
8403         /* do not flush cache until the cache is fully built */
8404         if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
8405                 rw_exit(&vhcache->vhcache_lock);
8406                 return;
8407         }
8408         rw_exit(&vhcache->vhcache_lock);
8409 
8410         mutex_enter(&vhc->vhc_lock);
8411         if (vhc->vhc_flags & MDI_VHC_READONLY_FS) {
8412                 mutex_exit(&vhc->vhc_lock);
8413                 return;
8414         }
8415 
8416         vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY;
8417         vhc->vhc_flush_at_ticks = ddi_get_lbolt() +
8418             mdi_vhcache_flush_delay * TICKS_PER_SECOND;
8419         if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
8420                 cv_broadcast(&vhc->vhc_cv);
8421                 create_thread = 0;
8422         } else {
8423                 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD;
8424                 create_thread = 1;
8425         }
8426         mutex_exit(&vhc->vhc_lock);
8427 
8428         if (create_thread)
8429                 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc,
8430                     0, &p0, TS_RUN, minclsyspri);
8431 }
8432 
8433 /*
8434  * phci bus config structure - one for for each phci bus config operation that
8435  * we initiate on behalf of a vhci.
8436  */
8437 typedef struct mdi_phci_bus_config_s {
8438         char *phbc_phci_path;
8439         struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */
8440         struct mdi_phci_bus_config_s *phbc_next;
8441 } mdi_phci_bus_config_t;
8442 
8443 /* vhci bus config structure - one for each vhci bus config operation */
8444 typedef struct mdi_vhci_bus_config_s {
8445         ddi_bus_config_op_t vhbc_op;    /* bus config op */
8446         major_t vhbc_op_major;          /* bus config op major */
8447         uint_t vhbc_op_flags;           /* bus config op flags */
8448         kmutex_t vhbc_lock;
8449         kcondvar_t vhbc_cv;
8450         int vhbc_thr_count;
8451 } mdi_vhci_bus_config_t;
8452 
8453 /*
8454  * bus config the specified phci
8455  */
8456 static void
8457 bus_config_phci(void *arg)
8458 {
8459         mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg;
8460         mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig;
8461         dev_info_t *ph_dip;
8462 
8463         /*
8464          * first configure all path components upto phci and then configure
8465          * the phci children.
8466          */
8467         if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0))
8468             != NULL) {
8469                 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER ||
8470                     vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) {
8471                         (void) ndi_devi_config_driver(ph_dip,
8472                             vhbc->vhbc_op_flags,
8473                             vhbc->vhbc_op_major);
8474                 } else
8475                         (void) ndi_devi_config(ph_dip,
8476                             vhbc->vhbc_op_flags);
8477 
8478                 /* release the hold that e_ddi_hold_devi_by_path() placed */
8479                 ndi_rele_devi(ph_dip);
8480         }
8481 
8482         kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1);
8483         kmem_free(phbc, sizeof (*phbc));
8484 
8485         mutex_enter(&vhbc->vhbc_lock);
8486         vhbc->vhbc_thr_count--;
8487         if (vhbc->vhbc_thr_count == 0)
8488                 cv_broadcast(&vhbc->vhbc_cv);
8489         mutex_exit(&vhbc->vhbc_lock);
8490 }
8491 
8492 /*
8493  * Bus config all phcis associated with the vhci in parallel.
8494  * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL.
8495  */
8496 static void
8497 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags,
8498     ddi_bus_config_op_t op, major_t maj)
8499 {
8500         mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next;
8501         mdi_vhci_bus_config_t *vhbc;
8502         mdi_vhcache_phci_t *cphci;
8503 
8504         rw_enter(&vhcache->vhcache_lock, RW_READER);
8505         if (vhcache->vhcache_phci_head == NULL) {
8506                 rw_exit(&vhcache->vhcache_lock);
8507                 return;
8508         }
8509 
8510         vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP);
8511 
8512         for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8513             cphci = cphci->cphci_next) {
8514                 /* skip phcis that haven't attached before root is available */
8515                 if (!modrootloaded && (cphci->cphci_phci == NULL))
8516                         continue;
8517                 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP);
8518                 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path,
8519                     KM_SLEEP);
8520                 phbc->phbc_vhbusconfig = vhbc;
8521                 phbc->phbc_next = phbc_head;
8522                 phbc_head = phbc;
8523                 vhbc->vhbc_thr_count++;
8524         }
8525         rw_exit(&vhcache->vhcache_lock);
8526 
8527         vhbc->vhbc_op = op;
8528         vhbc->vhbc_op_major = maj;
8529         vhbc->vhbc_op_flags = NDI_NO_EVENT |
8530             (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE));
8531         mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL);
8532         cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL);
8533 
8534         /* now create threads to initiate bus config on all phcis in parallel */
8535         for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) {
8536                 phbc_next = phbc->phbc_next;
8537                 if (mdi_mtc_off)
8538                         bus_config_phci((void *)phbc);
8539                 else
8540                         (void) thread_create(NULL, 0, bus_config_phci, phbc,
8541                             0, &p0, TS_RUN, minclsyspri);
8542         }
8543 
8544         mutex_enter(&vhbc->vhbc_lock);
8545         /* wait until all threads exit */
8546         while (vhbc->vhbc_thr_count > 0)
8547                 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock);
8548         mutex_exit(&vhbc->vhbc_lock);
8549 
8550         mutex_destroy(&vhbc->vhbc_lock);
8551         cv_destroy(&vhbc->vhbc_cv);
8552         kmem_free(vhbc, sizeof (*vhbc));
8553 }
8554 
8555 /*
8556  * Single threaded version of bus_config_all_phcis()
8557  */
8558 static void
8559 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags,
8560     ddi_bus_config_op_t op, major_t maj)
8561 {
8562         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8563 
8564         single_threaded_vhconfig_enter(vhc);
8565         bus_config_all_phcis(vhcache, flags, op, maj);
8566         single_threaded_vhconfig_exit(vhc);
8567 }
8568 
8569 /*
8570  * Perform BUS_CONFIG_ONE on the specified child of the phci.
8571  * The path includes the child component in addition to the phci path.
8572  */
8573 static int
8574 bus_config_one_phci_child(char *path)
8575 {
8576         dev_info_t *ph_dip, *child;
8577         char *devnm;
8578         int rv = MDI_FAILURE;
8579 
8580         /* extract the child component of the phci */
8581         devnm = strrchr(path, '/');
8582         *devnm++ = '\0';
8583 
8584         /*
8585          * first configure all path components upto phci and then
8586          * configure the phci child.
8587          */
8588         if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) {
8589                 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) ==
8590                     NDI_SUCCESS) {
8591                         /*
8592                          * release the hold that ndi_devi_config_one() placed
8593                          */
8594                         ndi_rele_devi(child);
8595                         rv = MDI_SUCCESS;
8596                 }
8597 
8598                 /* release the hold that e_ddi_hold_devi_by_path() placed */
8599                 ndi_rele_devi(ph_dip);
8600         }
8601 
8602         devnm--;
8603         *devnm = '/';
8604         return (rv);
8605 }
8606 
8607 /*
8608  * Build a list of phci client paths for the specified vhci client.
8609  * The list includes only those phci client paths which aren't configured yet.
8610  */
8611 static mdi_phys_path_t *
8612 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name)
8613 {
8614         mdi_vhcache_pathinfo_t *cpi;
8615         mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp;
8616         int config_path, len;
8617 
8618         for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8619                 /*
8620                  * include only those paths that aren't configured.
8621                  */
8622                 config_path = 0;
8623                 if (cpi->cpi_pip == NULL)
8624                         config_path = 1;
8625                 else {
8626                         MDI_PI_LOCK(cpi->cpi_pip);
8627                         if (MDI_PI_IS_INIT(cpi->cpi_pip))
8628                                 config_path = 1;
8629                         MDI_PI_UNLOCK(cpi->cpi_pip);
8630                 }
8631 
8632                 if (config_path) {
8633                         pp = kmem_alloc(sizeof (*pp), KM_SLEEP);
8634                         len = strlen(cpi->cpi_cphci->cphci_path) +
8635                             strlen(ct_name) + strlen(cpi->cpi_addr) + 3;
8636                         pp->phys_path = kmem_alloc(len, KM_SLEEP);
8637                         (void) snprintf(pp->phys_path, len, "%s/%s@%s",
8638                             cpi->cpi_cphci->cphci_path, ct_name,
8639                             cpi->cpi_addr);
8640                         pp->phys_path_next = NULL;
8641 
8642                         if (pp_head == NULL)
8643                                 pp_head = pp;
8644                         else
8645                                 pp_tail->phys_path_next = pp;
8646                         pp_tail = pp;
8647                 }
8648         }
8649 
8650         return (pp_head);
8651 }
8652 
8653 /*
8654  * Free the memory allocated for phci client path list.
8655  */
8656 static void
8657 free_phclient_path_list(mdi_phys_path_t *pp_head)
8658 {
8659         mdi_phys_path_t *pp, *pp_next;
8660 
8661         for (pp = pp_head; pp != NULL; pp = pp_next) {
8662                 pp_next = pp->phys_path_next;
8663                 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1);
8664                 kmem_free(pp, sizeof (*pp));
8665         }
8666 }
8667 
8668 /*
8669  * Allocated async client structure and initialize with the specified values.
8670  */
8671 static mdi_async_client_config_t *
8672 alloc_async_client_config(char *ct_name, char *ct_addr,
8673     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8674 {
8675         mdi_async_client_config_t *acc;
8676 
8677         acc = kmem_alloc(sizeof (*acc), KM_SLEEP);
8678         acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP);
8679         acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP);
8680         acc->acc_phclient_path_list_head = pp_head;
8681         init_vhcache_lookup_token(&acc->acc_token, tok);
8682         acc->acc_next = NULL;
8683         return (acc);
8684 }
8685 
8686 /*
8687  * Free the memory allocated for the async client structure and their members.
8688  */
8689 static void
8690 free_async_client_config(mdi_async_client_config_t *acc)
8691 {
8692         if (acc->acc_phclient_path_list_head)
8693                 free_phclient_path_list(acc->acc_phclient_path_list_head);
8694         kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1);
8695         kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1);
8696         kmem_free(acc, sizeof (*acc));
8697 }
8698 
8699 /*
8700  * Sort vhcache pathinfos (cpis) of the specified client.
8701  * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
8702  * flag set come at the beginning of the list. All cpis which have this
8703  * flag set come at the end of the list.
8704  */
8705 static void
8706 sort_vhcache_paths(mdi_vhcache_client_t *cct)
8707 {
8708         mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head;
8709 
8710         cpi_head = cct->cct_cpi_head;
8711         cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
8712         for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
8713                 cpi_next = cpi->cpi_next;
8714                 enqueue_vhcache_pathinfo(cct, cpi);
8715         }
8716 }
8717 
8718 /*
8719  * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for
8720  * every vhcache pathinfo of the specified client. If not adjust the flag
8721  * setting appropriately.
8722  *
8723  * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the
8724  * on-disk vhci cache. So every time this flag is updated the cache must be
8725  * flushed.
8726  */
8727 static void
8728 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8729     mdi_vhcache_lookup_token_t *tok)
8730 {
8731         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8732         mdi_vhcache_client_t *cct;
8733         mdi_vhcache_pathinfo_t *cpi;
8734 
8735         rw_enter(&vhcache->vhcache_lock, RW_READER);
8736         if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok))
8737             == NULL) {
8738                 rw_exit(&vhcache->vhcache_lock);
8739                 return;
8740         }
8741 
8742         /*
8743          * to avoid unnecessary on-disk cache updates, first check if an
8744          * update is really needed. If no update is needed simply return.
8745          */
8746         for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8747                 if ((cpi->cpi_pip != NULL &&
8748                     (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) ||
8749                     (cpi->cpi_pip == NULL &&
8750                     !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) {
8751                         break;
8752                 }
8753         }
8754         if (cpi == NULL) {
8755                 rw_exit(&vhcache->vhcache_lock);
8756                 return;
8757         }
8758 
8759         if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) {
8760                 rw_exit(&vhcache->vhcache_lock);
8761                 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8762                 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr,
8763                     tok)) == NULL) {
8764                         rw_exit(&vhcache->vhcache_lock);
8765                         return;
8766                 }
8767         }
8768 
8769         for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8770                 if (cpi->cpi_pip != NULL)
8771                         cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8772                 else
8773                         cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8774         }
8775         sort_vhcache_paths(cct);
8776 
8777         rw_exit(&vhcache->vhcache_lock);
8778         vhcache_dirty(vhc);
8779 }
8780 
8781 /*
8782  * Configure all specified paths of the client.
8783  */
8784 static void
8785 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8786     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8787 {
8788         mdi_phys_path_t *pp;
8789 
8790         for (pp = pp_head; pp != NULL; pp = pp->phys_path_next)
8791                 (void) bus_config_one_phci_child(pp->phys_path);
8792         adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok);
8793 }
8794 
8795 /*
8796  * Dequeue elements from vhci async client config list and bus configure
8797  * their corresponding phci clients.
8798  */
8799 static void
8800 config_client_paths_thread(void *arg)
8801 {
8802         mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8803         mdi_async_client_config_t *acc;
8804         clock_t quit_at_ticks;
8805         clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND;
8806         callb_cpr_t cprinfo;
8807 
8808         CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
8809             "mdi_config_client_paths");
8810 
8811         for (; ; ) {
8812                 quit_at_ticks = ddi_get_lbolt() + idle_time;
8813 
8814                 mutex_enter(&vhc->vhc_lock);
8815                 while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8816                     vhc->vhc_acc_list_head == NULL &&
8817                     ddi_get_lbolt() < quit_at_ticks) {
8818                         CALLB_CPR_SAFE_BEGIN(&cprinfo);
8819                         (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8820                             quit_at_ticks);
8821                         CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8822                 }
8823 
8824                 if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
8825                     vhc->vhc_acc_list_head == NULL)
8826                         goto out;
8827 
8828                 acc = vhc->vhc_acc_list_head;
8829                 vhc->vhc_acc_list_head = acc->acc_next;
8830                 if (vhc->vhc_acc_list_head == NULL)
8831                         vhc->vhc_acc_list_tail = NULL;
8832                 vhc->vhc_acc_count--;
8833                 mutex_exit(&vhc->vhc_lock);
8834 
8835                 config_client_paths_sync(vhc, acc->acc_ct_name,
8836                     acc->acc_ct_addr, acc->acc_phclient_path_list_head,
8837                     &acc->acc_token);
8838 
8839                 free_async_client_config(acc);
8840         }
8841 
8842 out:
8843         vhc->vhc_acc_thrcount--;
8844         /* CALLB_CPR_EXIT releases the vhc->vhc_lock */
8845         CALLB_CPR_EXIT(&cprinfo);
8846 }
8847 
8848 /*
8849  * Arrange for all the phci client paths (pp_head) for the specified client
8850  * to be bus configured asynchronously by a thread.
8851  */
8852 static void
8853 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8854     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8855 {
8856         mdi_async_client_config_t *acc, *newacc;
8857         int create_thread;
8858 
8859         if (pp_head == NULL)
8860                 return;
8861 
8862         if (mdi_mtc_off) {
8863                 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok);
8864                 free_phclient_path_list(pp_head);
8865                 return;
8866         }
8867 
8868         newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok);
8869         ASSERT(newacc);
8870 
8871         mutex_enter(&vhc->vhc_lock);
8872         for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) {
8873                 if (strcmp(ct_name, acc->acc_ct_name) == 0 &&
8874                     strcmp(ct_addr, acc->acc_ct_addr) == 0) {
8875                         free_async_client_config(newacc);
8876                         mutex_exit(&vhc->vhc_lock);
8877                         return;
8878                 }
8879         }
8880 
8881         if (vhc->vhc_acc_list_head == NULL)
8882                 vhc->vhc_acc_list_head = newacc;
8883         else
8884                 vhc->vhc_acc_list_tail->acc_next = newacc;
8885         vhc->vhc_acc_list_tail = newacc;
8886         vhc->vhc_acc_count++;
8887         if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) {
8888                 cv_broadcast(&vhc->vhc_cv);
8889                 create_thread = 0;
8890         } else {
8891                 vhc->vhc_acc_thrcount++;
8892                 create_thread = 1;
8893         }
8894         mutex_exit(&vhc->vhc_lock);
8895 
8896         if (create_thread)
8897                 (void) thread_create(NULL, 0, config_client_paths_thread, vhc,
8898                     0, &p0, TS_RUN, minclsyspri);
8899 }
8900 
8901 /*
8902  * Return number of online paths for the specified client.
8903  */
8904 static int
8905 nonline_paths(mdi_vhcache_client_t *cct)
8906 {
8907         mdi_vhcache_pathinfo_t *cpi;
8908         int online_count = 0;
8909 
8910         for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8911                 if (cpi->cpi_pip != NULL) {
8912                         MDI_PI_LOCK(cpi->cpi_pip);
8913                         if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE)
8914                                 online_count++;
8915                         MDI_PI_UNLOCK(cpi->cpi_pip);
8916                 }
8917         }
8918 
8919         return (online_count);
8920 }
8921 
8922 /*
8923  * Bus configure all paths for the specified vhci client.
8924  * If at least one path for the client is already online, the remaining paths
8925  * will be configured asynchronously. Otherwise, it synchronously configures
8926  * the paths until at least one path is online and then rest of the paths
8927  * will be configured asynchronously.
8928  */
8929 static void
8930 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr)
8931 {
8932         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8933         mdi_phys_path_t *pp_head, *pp;
8934         mdi_vhcache_client_t *cct;
8935         mdi_vhcache_lookup_token_t tok;
8936 
8937         ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8938 
8939         init_vhcache_lookup_token(&tok, NULL);
8940 
8941         if (ct_name == NULL || ct_addr == NULL ||
8942             (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok))
8943             == NULL ||
8944             (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) {
8945                 rw_exit(&vhcache->vhcache_lock);
8946                 return;
8947         }
8948 
8949         /* if at least one path is online, configure the rest asynchronously */
8950         if (nonline_paths(cct) > 0) {
8951                 rw_exit(&vhcache->vhcache_lock);
8952                 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok);
8953                 return;
8954         }
8955 
8956         rw_exit(&vhcache->vhcache_lock);
8957 
8958         for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) {
8959                 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) {
8960                         rw_enter(&vhcache->vhcache_lock, RW_READER);
8961 
8962                         if ((cct = lookup_vhcache_client(vhcache, ct_name,
8963                             ct_addr, &tok)) == NULL) {
8964                                 rw_exit(&vhcache->vhcache_lock);
8965                                 goto out;
8966                         }
8967 
8968                         if (nonline_paths(cct) > 0 &&
8969                             pp->phys_path_next != NULL) {
8970                                 rw_exit(&vhcache->vhcache_lock);
8971                                 config_client_paths_async(vhc, ct_name, ct_addr,
8972                                     pp->phys_path_next, &tok);
8973                                 pp->phys_path_next = NULL;
8974                                 goto out;
8975                         }
8976 
8977                         rw_exit(&vhcache->vhcache_lock);
8978                 }
8979         }
8980 
8981         adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok);
8982 out:
8983         free_phclient_path_list(pp_head);
8984 }
8985 
8986 static void
8987 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc)
8988 {
8989         mutex_enter(&vhc->vhc_lock);
8990         while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED)
8991                 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock);
8992         vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED;
8993         mutex_exit(&vhc->vhc_lock);
8994 }
8995 
8996 static void
8997 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc)
8998 {
8999         mutex_enter(&vhc->vhc_lock);
9000         vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED;
9001         cv_broadcast(&vhc->vhc_cv);
9002         mutex_exit(&vhc->vhc_lock);
9003 }
9004 
9005 typedef struct mdi_phci_driver_info {
9006         char    *phdriver_name; /* name of the phci driver */
9007 
9008         /* set to non zero if the phci driver supports root device */
9009         int     phdriver_root_support;
9010 } mdi_phci_driver_info_t;
9011 
9012 /*
9013  * vhci class and root support capability of a phci driver can be
9014  * specified using ddi-vhci-class and ddi-no-root-support properties in the
9015  * phci driver.conf file. The built-in tables below contain this information
9016  * for those phci drivers whose driver.conf files don't yet contain this info.
9017  *
9018  * All phci drivers expect iscsi have root device support.
9019  */
9020 static mdi_phci_driver_info_t scsi_phci_driver_list[] = {
9021         { "fp", 1 },
9022         { "iscsi", 0 },
9023         { "ibsrp", 1 }
9024         };
9025 
9026 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 };
9027 
9028 static void *
9029 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size)
9030 {
9031         void *new_ptr;
9032 
9033         new_ptr = kmem_zalloc(new_size, KM_SLEEP);
9034         if (old_ptr) {
9035                 bcopy(old_ptr, new_ptr, MIN(old_size, new_size));
9036                 kmem_free(old_ptr, old_size);
9037         }
9038         return (new_ptr);
9039 }
9040 
9041 static void
9042 add_to_phci_list(char ***driver_list, int **root_support_list,
9043     int *cur_elements, int *max_elements, char *driver_name, int root_support)
9044 {
9045         ASSERT(*cur_elements <= *max_elements);
9046         if (*cur_elements == *max_elements) {
9047                 *max_elements += 10;
9048                 *driver_list = mdi_realloc(*driver_list,
9049                     sizeof (char *) * (*cur_elements),
9050                     sizeof (char *) * (*max_elements));
9051                 *root_support_list = mdi_realloc(*root_support_list,
9052                     sizeof (int) * (*cur_elements),
9053                     sizeof (int) * (*max_elements));
9054         }
9055         (*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP);
9056         (*root_support_list)[*cur_elements] = root_support;
9057         (*cur_elements)++;
9058 }
9059 
9060 static void
9061 get_phci_driver_list(char *vhci_class, char ***driver_list,
9062     int **root_support_list, int *cur_elements, int *max_elements)
9063 {
9064         mdi_phci_driver_info_t  *st_driver_list, *p;
9065         int             st_ndrivers, root_support, i, j, driver_conf_count;
9066         major_t         m;
9067         struct devnames *dnp;
9068         ddi_prop_t      *propp;
9069 
9070         *driver_list = NULL;
9071         *root_support_list = NULL;
9072         *cur_elements = 0;
9073         *max_elements = 0;
9074 
9075         /* add the phci drivers derived from the phci driver.conf files */
9076         for (m = 0; m < devcnt; m++) {
9077                 dnp = &devnamesp[m];
9078 
9079                 if (dnp->dn_flags & DN_PHCI_DRIVER) {
9080                         LOCK_DEV_OPS(&dnp->dn_lock);
9081                         if (dnp->dn_global_prop_ptr != NULL &&
9082                             (propp = i_ddi_prop_search(DDI_DEV_T_ANY,
9083                             DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING,
9084                             &dnp->dn_global_prop_ptr->prop_list)) != NULL &&
9085                             strcmp(propp->prop_val, vhci_class) == 0) {
9086 
9087                                 root_support = (i_ddi_prop_search(DDI_DEV_T_ANY,
9088                                     DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT,
9089                                     &dnp->dn_global_prop_ptr->prop_list)
9090                                     == NULL) ? 1 : 0;
9091 
9092                                 add_to_phci_list(driver_list, root_support_list,
9093                                     cur_elements, max_elements, dnp->dn_name,
9094                                     root_support);
9095 
9096                                 UNLOCK_DEV_OPS(&dnp->dn_lock);
9097                         } else
9098                                 UNLOCK_DEV_OPS(&dnp->dn_lock);
9099                 }
9100         }
9101 
9102         driver_conf_count = *cur_elements;
9103 
9104         /* add the phci drivers specified in the built-in tables */
9105         if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) {
9106                 st_driver_list = scsi_phci_driver_list;
9107                 st_ndrivers = sizeof (scsi_phci_driver_list) /
9108                     sizeof (mdi_phci_driver_info_t);
9109         } else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) {
9110                 st_driver_list = ib_phci_driver_list;
9111                 st_ndrivers = sizeof (ib_phci_driver_list) /
9112                     sizeof (mdi_phci_driver_info_t);
9113         } else {
9114                 st_driver_list = NULL;
9115                 st_ndrivers = 0;
9116         }
9117 
9118         for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) {
9119                 /* add this phci driver if not already added before */
9120                 for (j = 0; j < driver_conf_count; j++) {
9121                         if (strcmp((*driver_list)[j], p->phdriver_name) == 0)
9122                                 break;
9123                 }
9124                 if (j == driver_conf_count) {
9125                         add_to_phci_list(driver_list, root_support_list,
9126                             cur_elements, max_elements, p->phdriver_name,
9127                             p->phdriver_root_support);
9128                 }
9129         }
9130 }
9131 
9132 /*
9133  * Attach the phci driver instances associated with the specified vhci class.
9134  * If root is mounted attach all phci driver instances.
9135  * If root is not mounted, attach the instances of only those phci
9136  * drivers that have the root support.
9137  */
9138 static void
9139 attach_phci_drivers(char *vhci_class)
9140 {
9141         char    **driver_list, **p;
9142         int     *root_support_list;
9143         int     cur_elements, max_elements, i;
9144         major_t m;
9145 
9146         get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
9147             &cur_elements, &max_elements);
9148 
9149         for (i = 0; i < cur_elements; i++) {
9150                 if (modrootloaded || root_support_list[i]) {
9151                         m = ddi_name_to_major(driver_list[i]);
9152                         if (m != DDI_MAJOR_T_NONE &&
9153                             ddi_hold_installed_driver(m))
9154                                 ddi_rele_driver(m);
9155                 }
9156         }
9157 
9158         if (driver_list) {
9159                 for (i = 0, p = driver_list; i < cur_elements; i++, p++)
9160                         kmem_free(*p, strlen(*p) + 1);
9161                 kmem_free(driver_list, sizeof (char *) * max_elements);
9162                 kmem_free(root_support_list, sizeof (int) * max_elements);
9163         }
9164 }
9165 
9166 /*
9167  * Build vhci cache:
9168  *
9169  * Attach phci driver instances and then drive BUS_CONFIG_ALL on
9170  * the phci driver instances. During this process the cache gets built.
9171  *
9172  * Cache is built fully if the root is mounted.
9173  * If the root is not mounted, phci drivers that do not have root support
9174  * are not attached. As a result the cache is built partially. The entries
9175  * in the cache reflect only those phci drivers that have root support.
9176  */
9177 static int
9178 build_vhci_cache(mdi_vhci_t *vh)
9179 {
9180         mdi_vhci_config_t *vhc = vh->vh_config;
9181         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9182 
9183         single_threaded_vhconfig_enter(vhc);
9184 
9185         rw_enter(&vhcache->vhcache_lock, RW_READER);
9186         if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) {
9187                 rw_exit(&vhcache->vhcache_lock);
9188                 single_threaded_vhconfig_exit(vhc);
9189                 return (0);
9190         }
9191         rw_exit(&vhcache->vhcache_lock);
9192 
9193         attach_phci_drivers(vh->vh_class);
9194         bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT,
9195             BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
9196 
9197         rw_enter(&vhcache->vhcache_lock, RW_WRITER);
9198         vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
9199         rw_exit(&vhcache->vhcache_lock);
9200 
9201         single_threaded_vhconfig_exit(vhc);
9202         vhcache_dirty(vhc);
9203         return (1);
9204 }
9205 
9206 /*
9207  * Determine if discovery of paths is needed.
9208  */
9209 static int
9210 vhcache_do_discovery(mdi_vhci_config_t *vhc)
9211 {
9212         int rv = 1;
9213 
9214         mutex_enter(&vhc->vhc_lock);
9215         if (i_ddi_io_initialized() == 0) {
9216                 if (vhc->vhc_path_discovery_boot > 0) {
9217                         vhc->vhc_path_discovery_boot--;
9218                         goto out;
9219                 }
9220         } else {
9221                 if (vhc->vhc_path_discovery_postboot > 0) {
9222                         vhc->vhc_path_discovery_postboot--;
9223                         goto out;
9224                 }
9225         }
9226 
9227         /*
9228          * Do full path discovery at most once per mdi_path_discovery_interval.
9229          * This is to avoid a series of full path discoveries when opening
9230          * stale /dev/[r]dsk links.
9231          */
9232         if (mdi_path_discovery_interval != -1 &&
9233             ddi_get_lbolt64() >= vhc->vhc_path_discovery_cutoff_time)
9234                 goto out;
9235 
9236         rv = 0;
9237 out:
9238         mutex_exit(&vhc->vhc_lock);
9239         return (rv);
9240 }
9241 
9242 /*
9243  * Discover all paths:
9244  *
9245  * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci
9246  * driver instances. During this process all paths will be discovered.
9247  */
9248 static int
9249 vhcache_discover_paths(mdi_vhci_t *vh)
9250 {
9251         mdi_vhci_config_t *vhc = vh->vh_config;
9252         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9253         int rv = 0;
9254 
9255         single_threaded_vhconfig_enter(vhc);
9256 
9257         if (vhcache_do_discovery(vhc)) {
9258                 attach_phci_drivers(vh->vh_class);
9259                 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE |
9260                     NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
9261 
9262                 mutex_enter(&vhc->vhc_lock);
9263                 vhc->vhc_path_discovery_cutoff_time = ddi_get_lbolt64() +
9264                     mdi_path_discovery_interval * TICKS_PER_SECOND;
9265                 mutex_exit(&vhc->vhc_lock);
9266                 rv = 1;
9267         }
9268 
9269         single_threaded_vhconfig_exit(vhc);
9270         return (rv);
9271 }
9272 
9273 /*
9274  * Generic vhci bus config implementation:
9275  *
9276  * Parameters
9277  *      vdip    vhci dip
9278  *      flags   bus config flags
9279  *      op      bus config operation
9280  *      The remaining parameters are bus config operation specific
9281  *
9282  * for BUS_CONFIG_ONE
9283  *      arg     pointer to name@addr
9284  *      child   upon successful return from this function, *child will be
9285  *              set to the configured and held devinfo child node of vdip.
9286  *      ct_addr pointer to client address (i.e. GUID)
9287  *
9288  * for BUS_CONFIG_DRIVER
9289  *      arg     major number of the driver
9290  *      child and ct_addr parameters are ignored
9291  *
9292  * for BUS_CONFIG_ALL
9293  *      arg, child, and ct_addr parameters are ignored
9294  *
9295  * Note that for the rest of the bus config operations, this function simply
9296  * calls the framework provided default bus config routine.
9297  */
9298 int
9299 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op,
9300     void *arg, dev_info_t **child, char *ct_addr)
9301 {
9302         mdi_vhci_t *vh = i_devi_get_vhci(vdip);
9303         mdi_vhci_config_t *vhc = vh->vh_config;
9304         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9305         int rv = 0;
9306         int params_valid = 0;
9307         char *cp;
9308 
9309         /*
9310          * To bus config vhcis we relay operation, possibly using another
9311          * thread, to phcis. The phci driver then interacts with MDI to cause
9312          * vhci child nodes to be enumerated under the vhci node.  Adding a
9313          * vhci child requires an ndi_devi_enter of the vhci. Since another
9314          * thread may be adding the child, to avoid deadlock we can't wait
9315          * for the relayed operations to complete if we have already entered
9316          * the vhci node.
9317          */
9318         if (DEVI_BUSY_OWNED(vdip)) {
9319                 MDI_DEBUG(2, (MDI_NOTE, vdip,
9320                     "vhci dip is busy owned %p", (void *)vdip));
9321                 goto default_bus_config;
9322         }
9323 
9324         rw_enter(&vhcache->vhcache_lock, RW_READER);
9325         if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
9326                 rw_exit(&vhcache->vhcache_lock);
9327                 rv = build_vhci_cache(vh);
9328                 rw_enter(&vhcache->vhcache_lock, RW_READER);
9329         }
9330 
9331         switch (op) {
9332         case BUS_CONFIG_ONE:
9333                 if (arg != NULL && ct_addr != NULL) {
9334                         /* extract node name */
9335                         cp = (char *)arg;
9336                         while (*cp != '\0' && *cp != '@')
9337                                 cp++;
9338                         if (*cp == '@') {
9339                                 params_valid = 1;
9340                                 *cp = '\0';
9341                                 config_client_paths(vhc, (char *)arg, ct_addr);
9342                                 /* config_client_paths() releases cache_lock */
9343                                 *cp = '@';
9344                                 break;
9345                         }
9346                 }
9347 
9348                 rw_exit(&vhcache->vhcache_lock);
9349                 break;
9350 
9351         case BUS_CONFIG_DRIVER:
9352                 rw_exit(&vhcache->vhcache_lock);
9353                 if (rv == 0)
9354                         st_bus_config_all_phcis(vhc, flags, op,
9355                             (major_t)(uintptr_t)arg);
9356                 break;
9357 
9358         case BUS_CONFIG_ALL:
9359                 rw_exit(&vhcache->vhcache_lock);
9360                 if (rv == 0)
9361                         st_bus_config_all_phcis(vhc, flags, op, -1);
9362                 break;
9363 
9364         default:
9365                 rw_exit(&vhcache->vhcache_lock);
9366                 break;
9367         }
9368 
9369 
9370 default_bus_config:
9371         /*
9372          * All requested child nodes are enumerated under the vhci.
9373          * Now configure them.
9374          */
9375         if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
9376             NDI_SUCCESS) {
9377                 return (MDI_SUCCESS);
9378         } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) {
9379                 /* discover all paths and try configuring again */
9380                 if (vhcache_discover_paths(vh) &&
9381                     ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
9382                     NDI_SUCCESS)
9383                         return (MDI_SUCCESS);
9384         }
9385 
9386         return (MDI_FAILURE);
9387 }
9388 
9389 /*
9390  * Read the on-disk vhci cache into an nvlist for the specified vhci class.
9391  */
9392 static nvlist_t *
9393 read_on_disk_vhci_cache(char *vhci_class)
9394 {
9395         nvlist_t *nvl;
9396         int err;
9397         char *filename;
9398 
9399         filename = vhclass2vhcache_filename(vhci_class);
9400 
9401         if ((err = fread_nvlist(filename, &nvl)) == 0) {
9402                 kmem_free(filename, strlen(filename) + 1);
9403                 return (nvl);
9404         } else if (err == EIO)
9405                 cmn_err(CE_WARN, "%s: I/O error, will recreate", filename);
9406         else if (err == EINVAL)
9407                 cmn_err(CE_WARN,
9408                     "%s: data file corrupted, will recreate", filename);
9409 
9410         kmem_free(filename, strlen(filename) + 1);
9411         return (NULL);
9412 }
9413 
9414 /*
9415  * Read on-disk vhci cache into nvlists for all vhci classes.
9416  * Called during booting by i_ddi_read_devices_files().
9417  */
9418 void
9419 mdi_read_devices_files(void)
9420 {
9421         int i;
9422 
9423         for (i = 0; i < N_VHCI_CLASSES; i++)
9424                 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]);
9425 }
9426 
9427 /*
9428  * Remove all stale entries from vhci cache.
9429  */
9430 static void
9431 clean_vhcache(mdi_vhci_config_t *vhc)
9432 {
9433         mdi_vhci_cache_t        *vhcache = &vhc->vhc_vhcache;
9434         mdi_vhcache_phci_t      *phci, *nxt_phci;
9435         mdi_vhcache_client_t    *client, *nxt_client;
9436         mdi_vhcache_pathinfo_t  *path, *nxt_path;
9437 
9438         rw_enter(&vhcache->vhcache_lock, RW_WRITER);
9439 
9440         client = vhcache->vhcache_client_head;
9441         vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL;
9442         for ( ; client != NULL; client = nxt_client) {
9443                 nxt_client = client->cct_next;
9444 
9445                 path = client->cct_cpi_head;
9446                 client->cct_cpi_head = client->cct_cpi_tail = NULL;
9447                 for ( ; path != NULL; path = nxt_path) {
9448                         nxt_path = path->cpi_next;
9449                         if ((path->cpi_cphci->cphci_phci != NULL) &&
9450                             (path->cpi_pip != NULL)) {
9451                                 enqueue_tail_vhcache_pathinfo(client, path);
9452                         } else if (path->cpi_pip != NULL) {
9453                                 /* Not valid to have a path without a phci. */
9454                                 free_vhcache_pathinfo(path);
9455                         }
9456                 }
9457 
9458                 if (client->cct_cpi_head != NULL)
9459                         enqueue_vhcache_client(vhcache, client);
9460                 else {
9461                         (void) mod_hash_destroy(vhcache->vhcache_client_hash,
9462                             (mod_hash_key_t)client->cct_name_addr);
9463                         free_vhcache_client(client);
9464                 }
9465         }
9466 
9467         phci = vhcache->vhcache_phci_head;
9468         vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL;
9469         for ( ; phci != NULL; phci = nxt_phci) {
9470 
9471                 nxt_phci = phci->cphci_next;
9472                 if (phci->cphci_phci != NULL)
9473                         enqueue_vhcache_phci(vhcache, phci);
9474                 else
9475                         free_vhcache_phci(phci);
9476         }
9477 
9478         vhcache->vhcache_clean_time = ddi_get_lbolt64();
9479         rw_exit(&vhcache->vhcache_lock);
9480         vhcache_dirty(vhc);
9481 }
9482 
9483 /*
9484  * Remove all stale entries from vhci cache.
9485  * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C
9486  */
9487 void
9488 mdi_clean_vhcache(void)
9489 {
9490         mdi_vhci_t *vh;
9491 
9492         mutex_enter(&mdi_mutex);
9493         for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9494                 vh->vh_refcnt++;
9495                 mutex_exit(&mdi_mutex);
9496                 clean_vhcache(vh->vh_config);
9497                 mutex_enter(&mdi_mutex);
9498                 vh->vh_refcnt--;
9499         }
9500         mutex_exit(&mdi_mutex);
9501 }
9502 
9503 /*
9504  * mdi_vhci_walk_clients():
9505  *              Walker routine to traverse client dev_info nodes
9506  * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree
9507  * below the client, including nexus devices, which we dont want.
9508  * So we just traverse the immediate siblings, starting from 1st client.
9509  */
9510 void
9511 mdi_vhci_walk_clients(dev_info_t *vdip,
9512     int (*f)(dev_info_t *, void *), void *arg)
9513 {
9514         mdi_vhci_t      *vh = i_devi_get_vhci(vdip);
9515         dev_info_t      *cdip;
9516         mdi_client_t    *ct;
9517 
9518         MDI_VHCI_CLIENT_LOCK(vh);
9519         cdip = ddi_get_child(vdip);
9520         while (cdip) {
9521                 ct = i_devi_get_client(cdip);
9522                 MDI_CLIENT_LOCK(ct);
9523 
9524                 if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE)
9525                         cdip = ddi_get_next_sibling(cdip);
9526                 else
9527                         cdip = NULL;
9528 
9529                 MDI_CLIENT_UNLOCK(ct);
9530         }
9531         MDI_VHCI_CLIENT_UNLOCK(vh);
9532 }
9533 
9534 /*
9535  * mdi_vhci_walk_phcis():
9536  *              Walker routine to traverse phci dev_info nodes
9537  */
9538 void
9539 mdi_vhci_walk_phcis(dev_info_t *vdip,
9540     int (*f)(dev_info_t *, void *), void *arg)
9541 {
9542         mdi_vhci_t      *vh = i_devi_get_vhci(vdip);
9543         mdi_phci_t      *ph, *next;
9544 
9545         MDI_VHCI_PHCI_LOCK(vh);
9546         ph = vh->vh_phci_head;
9547         while (ph) {
9548                 MDI_PHCI_LOCK(ph);
9549 
9550                 if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE)
9551                         next = ph->ph_next;
9552                 else
9553                         next = NULL;
9554 
9555                 MDI_PHCI_UNLOCK(ph);
9556                 ph = next;
9557         }
9558         MDI_VHCI_PHCI_UNLOCK(vh);
9559 }
9560 
9561 
9562 /*
9563  * mdi_walk_vhcis():
9564  *              Walker routine to traverse vhci dev_info nodes
9565  */
9566 void
9567 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg)
9568 {
9569         mdi_vhci_t      *vh = NULL;
9570 
9571         mutex_enter(&mdi_mutex);
9572         /*
9573          * Scan for already registered vhci
9574          */
9575         for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9576                 vh->vh_refcnt++;
9577                 mutex_exit(&mdi_mutex);
9578                 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) {
9579                         mutex_enter(&mdi_mutex);
9580                         vh->vh_refcnt--;
9581                         break;
9582                 } else {
9583                         mutex_enter(&mdi_mutex);
9584                         vh->vh_refcnt--;
9585                 }
9586         }
9587 
9588         mutex_exit(&mdi_mutex);
9589 }
9590 
9591 /*
9592  * i_mdi_log_sysevent():
9593  *              Logs events for pickup by syseventd
9594  */
9595 static void
9596 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass)
9597 {
9598         char            *path_name;
9599         nvlist_t        *attr_list;
9600 
9601         if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE,
9602             KM_SLEEP) != DDI_SUCCESS) {
9603                 goto alloc_failed;
9604         }
9605 
9606         path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
9607         (void) ddi_pathname(dip, path_name);
9608 
9609         if (nvlist_add_string(attr_list, DDI_DRIVER_NAME,
9610             ddi_driver_name(dip)) != DDI_SUCCESS) {
9611                 goto error;
9612         }
9613 
9614         if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR,
9615             (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) {
9616                 goto error;
9617         }
9618 
9619         if (nvlist_add_int32(attr_list, DDI_INSTANCE,
9620             (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) {
9621                 goto error;
9622         }
9623 
9624         if (nvlist_add_string(attr_list, DDI_PATHNAME,
9625             path_name) != DDI_SUCCESS) {
9626                 goto error;
9627         }
9628 
9629         if (nvlist_add_string(attr_list, DDI_CLASS,
9630             ph_vh_class) != DDI_SUCCESS) {
9631                 goto error;
9632         }
9633 
9634         (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass,
9635             attr_list, NULL, DDI_SLEEP);
9636 
9637 error:
9638         kmem_free(path_name, MAXPATHLEN);
9639         nvlist_free(attr_list);
9640         return;
9641 
9642 alloc_failed:
9643         MDI_DEBUG(1, (MDI_WARN, dip, "!unable to send sysevent"));
9644 }
9645 
9646 char **
9647 mdi_get_phci_driver_list(char *vhci_class, int  *ndrivers)
9648 {
9649         char    **driver_list, **ret_driver_list = NULL;
9650         int     *root_support_list;
9651         int     cur_elements, max_elements;
9652 
9653         get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
9654             &cur_elements, &max_elements);
9655 
9656 
9657         if (driver_list) {
9658                 kmem_free(root_support_list, sizeof (int) * max_elements);
9659                 ret_driver_list = mdi_realloc(driver_list, sizeof (char *)
9660                     * max_elements, sizeof (char *) * cur_elements);
9661         }
9662         *ndrivers = cur_elements;
9663 
9664         return (ret_driver_list);
9665 
9666 }
9667 
9668 void
9669 mdi_free_phci_driver_list(char **driver_list, int ndrivers)
9670 {
9671         char    **p;
9672         int     i;
9673 
9674         if (driver_list) {
9675                 for (i = 0, p = driver_list; i < ndrivers; i++, p++)
9676                         kmem_free(*p, strlen(*p) + 1);
9677                 kmem_free(driver_list, sizeof (char *) * ndrivers);
9678         }
9679 }
9680 
9681 /*
9682  * mdi_is_dev_supported():
9683  *              function called by pHCI bus config operation to determine if a
9684  *              device should be represented as a child of the vHCI or the
9685  *              pHCI.  This decision is made by the vHCI, using cinfo idenity
9686  *              information passed by the pHCI - specifics of the cinfo
9687  *              representation are by agreement between the pHCI and vHCI.
9688  * Return Values:
9689  *              MDI_SUCCESS
9690  *              MDI_FAILURE
9691  */
9692 int
9693 mdi_is_dev_supported(char *class, dev_info_t *pdip, void *cinfo)
9694 {
9695         mdi_vhci_t      *vh;
9696 
9697         ASSERT(class && pdip);
9698 
9699         /*
9700          * For dev_supported, mdi_phci_register() must have established pdip as
9701          * a pHCI.
9702          *
9703          * NOTE: mdi_phci_register() does "mpxio-disable" processing, and
9704          * MDI_PHCI(pdip) will return false if mpxio is disabled.
9705          */
9706         if (!MDI_PHCI(pdip))
9707                 return (MDI_FAILURE);
9708 
9709         /* Return MDI_FAILURE if vHCI does not support asking the question. */
9710         vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
9711         if ((vh == NULL) || (vh->vh_ops->vo_is_dev_supported == NULL)) {
9712                 return (MDI_FAILURE);
9713         }
9714 
9715         /* Return vHCI answer */
9716         return (vh->vh_ops->vo_is_dev_supported(vh->vh_dip, pdip, cinfo));
9717 }
9718 
9719 int
9720 mdi_dc_return_dev_state(mdi_pathinfo_t *pip, struct devctl_iocdata *dcp)
9721 {
9722         uint_t devstate = 0;
9723         dev_info_t *cdip;
9724 
9725         if ((pip == NULL) || (dcp == NULL))
9726                 return (MDI_FAILURE);
9727 
9728         cdip = mdi_pi_get_client(pip);
9729 
9730         switch (mdi_pi_get_state(pip)) {
9731         case MDI_PATHINFO_STATE_INIT:
9732                 devstate = DEVICE_DOWN;
9733                 break;
9734         case MDI_PATHINFO_STATE_ONLINE:
9735                 devstate = DEVICE_ONLINE;
9736                 if ((cdip) && (devi_stillreferenced(cdip) == DEVI_REFERENCED))
9737                         devstate |= DEVICE_BUSY;
9738                 break;
9739         case MDI_PATHINFO_STATE_STANDBY:
9740                 devstate = DEVICE_ONLINE;
9741                 break;
9742         case MDI_PATHINFO_STATE_FAULT:
9743                 devstate = DEVICE_DOWN;
9744                 break;
9745         case MDI_PATHINFO_STATE_OFFLINE:
9746                 devstate = DEVICE_OFFLINE;
9747                 break;
9748         default:
9749                 ASSERT(MDI_PI(pip)->pi_state);
9750         }
9751 
9752         if (copyout(&devstate, dcp->cpyout_buf, sizeof (uint_t)) != 0)
9753                 return (MDI_FAILURE);
9754 
9755         return (MDI_SUCCESS);
9756 }