1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2018 Nexenta Systems, Inc.
  25  */
  26 
  27 /*
  28  * Multipath driver interface (MDI) implementation; see mdi_impldefs.h for a
  29  * more detailed discussion of the overall mpxio architecture.
  30  */
  31 
  32 #include <sys/note.h>
  33 #include <sys/types.h>
  34 #include <sys/varargs.h>
  35 #include <sys/param.h>
  36 #include <sys/errno.h>
  37 #include <sys/uio.h>
  38 #include <sys/buf.h>
  39 #include <sys/modctl.h>
  40 #include <sys/open.h>
  41 #include <sys/kmem.h>
  42 #include <sys/poll.h>
  43 #include <sys/conf.h>
  44 #include <sys/bootconf.h>
  45 #include <sys/cmn_err.h>
  46 #include <sys/stat.h>
  47 #include <sys/ddi.h>
  48 #include <sys/sunddi.h>
  49 #include <sys/ddipropdefs.h>
  50 #include <sys/sunndi.h>
  51 #include <sys/ndi_impldefs.h>
  52 #include <sys/promif.h>
  53 #include <sys/sunmdi.h>
  54 #include <sys/mdi_impldefs.h>
  55 #include <sys/taskq.h>
  56 #include <sys/epm.h>
  57 #include <sys/sunpm.h>
  58 #include <sys/modhash.h>
  59 #include <sys/disp.h>
  60 #include <sys/autoconf.h>
  61 #include <sys/sysmacros.h>
  62 
  63 #ifdef  DEBUG
  64 #include <sys/debug.h>
  65 int     mdi_debug = 1;
  66 int     mdi_debug_logonly = 0;
  67 #define MDI_DEBUG(dbglevel, pargs) if (mdi_debug >= (dbglevel))      i_mdi_log pargs
  68 #define MDI_WARN        CE_WARN, __func__
  69 #define MDI_NOTE        CE_NOTE, __func__
  70 #define MDI_CONT        CE_CONT, __func__
  71 static void i_mdi_log(int, const char *, dev_info_t *, const char *, ...);
  72 #else   /* !DEBUG */
  73 #define MDI_DEBUG(dbglevel, pargs)
  74 #endif  /* DEBUG */
  75 int     mdi_debug_consoleonly = 0;
  76 int     mdi_delay = 3;
  77 
  78 extern pri_t    minclsyspri;
  79 extern int      modrootloaded;
  80 
  81 /*
  82  * Global mutex:
  83  * Protects vHCI list and structure members.
  84  */
  85 kmutex_t        mdi_mutex;
  86 
  87 /*
  88  * Registered vHCI class driver lists
  89  */
  90 int             mdi_vhci_count;
  91 mdi_vhci_t      *mdi_vhci_head;
  92 mdi_vhci_t      *mdi_vhci_tail;
  93 
  94 /*
  95  * Client Hash Table size
  96  */
  97 static int      mdi_client_table_size = CLIENT_HASH_TABLE_SIZE;
  98 
  99 /*
 100  * taskq interface definitions
 101  */
 102 #define MDI_TASKQ_N_THREADS     8
 103 #define MDI_TASKQ_PRI           minclsyspri
 104 #define MDI_TASKQ_MINALLOC      (4*mdi_taskq_n_threads)
 105 #define MDI_TASKQ_MAXALLOC      (500*mdi_taskq_n_threads)
 106 
 107 taskq_t                         *mdi_taskq;
 108 static uint_t                   mdi_taskq_n_threads = MDI_TASKQ_N_THREADS;
 109 
 110 #define TICKS_PER_SECOND        (drv_usectohz(1000000))
 111 
 112 /*
 113  * The data should be "quiet" for this interval (in seconds) before the
 114  * vhci cached data is flushed to the disk.
 115  */
 116 static int mdi_vhcache_flush_delay = 10;
 117 
 118 /* number of seconds the vhcache flush daemon will sleep idle before exiting */
 119 static int mdi_vhcache_flush_daemon_idle_time = 60;
 120 
 121 /*
 122  * MDI falls back to discovery of all paths when a bus_config_one fails.
 123  * The following parameters can be used to tune this operation.
 124  *
 125  * mdi_path_discovery_boot
 126  *      Number of times path discovery will be attempted during early boot.
 127  *      Probably there is no reason to ever set this value to greater than one.
 128  *
 129  * mdi_path_discovery_postboot
 130  *      Number of times path discovery will be attempted after early boot.
 131  *      Set it to a minimum of two to allow for discovery of iscsi paths which
 132  *      may happen very late during booting.
 133  *
 134  * mdi_path_discovery_interval
 135  *      Minimum number of seconds MDI will wait between successive discovery
 136  *      of all paths. Set it to -1 to disable discovery of all paths.
 137  */
 138 static int mdi_path_discovery_boot = 1;
 139 static int mdi_path_discovery_postboot = 2;
 140 static int mdi_path_discovery_interval = 10;
 141 
 142 /*
 143  * number of seconds the asynchronous configuration thread will sleep idle
 144  * before exiting.
 145  */
 146 static int mdi_async_config_idle_time = 600;
 147 
 148 static int mdi_bus_config_cache_hash_size = 256;
 149 
 150 /* turns off multithreaded configuration for certain operations */
 151 static int mdi_mtc_off = 0;
 152 
 153 /*
 154  * The "path" to a pathinfo node is identical to the /devices path to a
 155  * devinfo node had the device been enumerated under a pHCI instead of
 156  * a vHCI.  This pathinfo "path" is associated with a 'path_instance'.
 157  * This association persists across create/delete of the pathinfo nodes,
 158  * but not across reboot.
 159  */
 160 static uint_t           mdi_pathmap_instance = 1;       /* 0 -> any path */
 161 static int              mdi_pathmap_hash_size = 256;
 162 static kmutex_t         mdi_pathmap_mutex;
 163 static mod_hash_t       *mdi_pathmap_bypath;            /* "path"->instance */
 164 static mod_hash_t       *mdi_pathmap_byinstance;        /* instance->"path" */
 165 static mod_hash_t       *mdi_pathmap_sbyinstance;       /* inst->shortpath */
 166 
 167 /*
 168  * MDI component property name/value string definitions
 169  */
 170 const char              *mdi_component_prop = "mpxio-component";
 171 const char              *mdi_component_prop_vhci = "vhci";
 172 const char              *mdi_component_prop_phci = "phci";
 173 const char              *mdi_component_prop_client = "client";
 174 
 175 /*
 176  * MDI client global unique identifier property name
 177  */
 178 const char              *mdi_client_guid_prop = "client-guid";
 179 
 180 /*
 181  * MDI client load balancing property name/value string definitions
 182  */
 183 const char              *mdi_load_balance = "load-balance";
 184 const char              *mdi_load_balance_none = "none";
 185 const char              *mdi_load_balance_rr = "round-robin";
 186 const char              *mdi_load_balance_lba = "logical-block";
 187 
 188 /*
 189  * Obsolete vHCI class definition; to be removed after Leadville update
 190  */
 191 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI;
 192 
 193 static char vhci_greeting[] =
 194         "\tThere already exists one vHCI driver for class %s\n"
 195         "\tOnly one vHCI driver for each class is allowed\n";
 196 
 197 /*
 198  * Static function prototypes
 199  */
 200 static int              i_mdi_phci_offline(dev_info_t *, uint_t);
 201 static int              i_mdi_client_offline(dev_info_t *, uint_t);
 202 static int              i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t);
 203 static void             i_mdi_phci_post_detach(dev_info_t *,
 204                             ddi_detach_cmd_t, int);
 205 static int              i_mdi_client_pre_detach(dev_info_t *,
 206                             ddi_detach_cmd_t);
 207 static void             i_mdi_client_post_detach(dev_info_t *,
 208                             ddi_detach_cmd_t, int);
 209 static void             i_mdi_pm_hold_pip(mdi_pathinfo_t *);
 210 static void             i_mdi_pm_rele_pip(mdi_pathinfo_t *);
 211 static int              i_mdi_lba_lb(mdi_client_t *ct,
 212                             mdi_pathinfo_t **ret_pip, struct buf *buf);
 213 static void             i_mdi_pm_hold_client(mdi_client_t *, int);
 214 static void             i_mdi_pm_rele_client(mdi_client_t *, int);
 215 static void             i_mdi_pm_reset_client(mdi_client_t *);
 216 static int              i_mdi_power_all_phci(mdi_client_t *);
 217 static void             i_mdi_log_sysevent(dev_info_t *, char *, char *);
 218 
 219 
 220 /*
 221  * Internal mdi_pathinfo node functions
 222  */
 223 static void             i_mdi_pi_kstat_destroy(mdi_pathinfo_t *);
 224 
 225 static mdi_vhci_t       *i_mdi_vhci_class2vhci(char *);
 226 static mdi_vhci_t       *i_devi_get_vhci(dev_info_t *);
 227 static mdi_phci_t       *i_devi_get_phci(dev_info_t *);
 228 static void             i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *);
 229 static void             i_mdi_phci_unlock(mdi_phci_t *);
 230 static mdi_pathinfo_t   *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *);
 231 static void             i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *);
 232 static void             i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *);
 233 static void             i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *,
 234                             mdi_client_t *);
 235 static void             i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *);
 236 static void             i_mdi_client_remove_path(mdi_client_t *,
 237                             mdi_pathinfo_t *);
 238 
 239 static int              i_mdi_pi_state_change(mdi_pathinfo_t *,
 240                             mdi_pathinfo_state_t, int);
 241 static int              i_mdi_pi_offline(mdi_pathinfo_t *, int);
 242 static dev_info_t       *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *,
 243                             char **, int);
 244 static dev_info_t       *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *);
 245 static int              i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int);
 246 static int              i_mdi_is_child_present(dev_info_t *, dev_info_t *);
 247 static mdi_client_t     *i_mdi_client_alloc(mdi_vhci_t *, char *, char *);
 248 static void             i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *);
 249 static void             i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *);
 250 static mdi_client_t     *i_mdi_client_find(mdi_vhci_t *, char *, char *);
 251 static void             i_mdi_client_update_state(mdi_client_t *);
 252 static int              i_mdi_client_compute_state(mdi_client_t *,
 253                             mdi_phci_t *);
 254 static void             i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *);
 255 static void             i_mdi_client_unlock(mdi_client_t *);
 256 static int              i_mdi_client_free(mdi_vhci_t *, mdi_client_t *);
 257 static mdi_client_t     *i_devi_get_client(dev_info_t *);
 258 /*
 259  * NOTE: this will be removed once the NWS files are changed to use the new
 260  * mdi_{enable,disable}_path interfaces
 261  */
 262 static int              i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *,
 263                                 int, int);
 264 static mdi_pathinfo_t   *i_mdi_enable_disable_path(mdi_pathinfo_t *pip,
 265                                 mdi_vhci_t *vh, int flags, int op);
 266 /*
 267  * Failover related function prototypes
 268  */
 269 static int              i_mdi_failover(void *);
 270 
 271 /*
 272  * misc internal functions
 273  */
 274 static int              i_mdi_get_hash_key(char *);
 275 static int              i_map_nvlist_error_to_mdi(int);
 276 static void             i_mdi_report_path_state(mdi_client_t *,
 277                             mdi_pathinfo_t *);
 278 
 279 static void             setup_vhci_cache(mdi_vhci_t *);
 280 static int              destroy_vhci_cache(mdi_vhci_t *);
 281 static int              stop_vhcache_async_threads(mdi_vhci_config_t *);
 282 static boolean_t        stop_vhcache_flush_thread(void *, int);
 283 static void             free_string_array(char **, int);
 284 static void             free_vhcache_phci(mdi_vhcache_phci_t *);
 285 static void             free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *);
 286 static void             free_vhcache_client(mdi_vhcache_client_t *);
 287 static int              mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *);
 288 static nvlist_t         *vhcache_to_mainnvl(mdi_vhci_cache_t *);
 289 static void             vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *);
 290 static void             vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *);
 291 static void             vhcache_pi_add(mdi_vhci_config_t *,
 292                             struct mdi_pathinfo *);
 293 static void             vhcache_pi_remove(mdi_vhci_config_t *,
 294                             struct mdi_pathinfo *);
 295 static void             free_phclient_path_list(mdi_phys_path_t *);
 296 static void             sort_vhcache_paths(mdi_vhcache_client_t *);
 297 static int              flush_vhcache(mdi_vhci_config_t *, int);
 298 static void             vhcache_dirty(mdi_vhci_config_t *);
 299 static void             free_async_client_config(mdi_async_client_config_t *);
 300 static void             single_threaded_vhconfig_enter(mdi_vhci_config_t *);
 301 static void             single_threaded_vhconfig_exit(mdi_vhci_config_t *);
 302 static nvlist_t         *read_on_disk_vhci_cache(char *);
 303 extern int              fread_nvlist(char *, nvlist_t **);
 304 extern int              fwrite_nvlist(char *, nvlist_t *);
 305 
 306 /* called once when first vhci registers with mdi */
 307 static void
 308 i_mdi_init()
 309 {
 310         static int initialized = 0;
 311 
 312         if (initialized)
 313                 return;
 314         initialized = 1;
 315 
 316         mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL);
 317 
 318         /* Create our taskq resources */
 319         mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads,
 320             MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC,
 321             TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
 322         ASSERT(mdi_taskq != NULL);      /* taskq_create never fails */
 323 
 324         /* Allocate ['path_instance' <-> "path"] maps */
 325         mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL);
 326         mdi_pathmap_bypath = mod_hash_create_strhash(
 327             "mdi_pathmap_bypath", mdi_pathmap_hash_size,
 328             mod_hash_null_valdtor);
 329         mdi_pathmap_byinstance = mod_hash_create_idhash(
 330             "mdi_pathmap_byinstance", mdi_pathmap_hash_size,
 331             mod_hash_null_valdtor);
 332         mdi_pathmap_sbyinstance = mod_hash_create_idhash(
 333             "mdi_pathmap_sbyinstance", mdi_pathmap_hash_size,
 334             mod_hash_null_valdtor);
 335 }
 336 
 337 /*
 338  * mdi_get_component_type():
 339  *              Return mpxio component type
 340  * Return Values:
 341  *              MDI_COMPONENT_NONE
 342  *              MDI_COMPONENT_VHCI
 343  *              MDI_COMPONENT_PHCI
 344  *              MDI_COMPONENT_CLIENT
 345  * XXX This doesn't work under multi-level MPxIO and should be
 346  *      removed when clients migrate mdi_component_is_*() interfaces.
 347  */
 348 int
 349 mdi_get_component_type(dev_info_t *dip)
 350 {
 351         return (DEVI(dip)->devi_mdi_component);
 352 }
 353 
 354 /*
 355  * mdi_vhci_register():
 356  *              Register a vHCI module with the mpxio framework
 357  *              mdi_vhci_register() is called by vHCI drivers to register the
 358  *              'class_driver' vHCI driver and its MDI entrypoints with the
 359  *              mpxio framework.  The vHCI driver must call this interface as
 360  *              part of its attach(9e) handler.
 361  *              Competing threads may try to attach mdi_vhci_register() as
 362  *              the vHCI drivers are loaded and attached as a result of pHCI
 363  *              driver instance registration (mdi_phci_register()) with the
 364  *              framework.
 365  * Return Values:
 366  *              MDI_SUCCESS
 367  *              MDI_FAILURE
 368  */
 369 /*ARGSUSED*/
 370 int
 371 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops,
 372     int flags)
 373 {
 374         mdi_vhci_t              *vh = NULL;
 375 
 376         /* Registrant can't be older */
 377         ASSERT(vops->vo_revision <= MDI_VHCI_OPS_REV);
 378 
 379 #ifdef DEBUG
 380         /*
 381          * IB nexus driver is loaded only when IB hardware is present.
 382          * In order to be able to do this there is a need to drive the loading
 383          * and attaching of the IB nexus driver (especially when an IB hardware
 384          * is dynamically plugged in) when an IB HCA driver (PHCI)
 385          * is being attached. Unfortunately this gets into the limitations
 386          * of devfs as there seems to be no clean way to drive configuration
 387          * of a subtree from another subtree of a devfs. Hence, do not ASSERT
 388          * for IB.
 389          */
 390         if (strcmp(class, MDI_HCI_CLASS_IB) != 0)
 391                 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
 392 #endif
 393 
 394         i_mdi_init();
 395 
 396         mutex_enter(&mdi_mutex);
 397         /*
 398          * Scan for already registered vhci
 399          */
 400         for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
 401                 if (strcmp(vh->vh_class, class) == 0) {
 402                         /*
 403                          * vHCI has already been created.  Check for valid
 404                          * vHCI ops registration.  We only support one vHCI
 405                          * module per class
 406                          */
 407                         if (vh->vh_ops != NULL) {
 408                                 mutex_exit(&mdi_mutex);
 409                                 cmn_err(CE_NOTE, vhci_greeting, class);
 410                                 return (MDI_FAILURE);
 411                         }
 412                         break;
 413                 }
 414         }
 415 
 416         /*
 417          * if not yet created, create the vHCI component
 418          */
 419         if (vh == NULL) {
 420                 struct client_hash      *hash = NULL;
 421                 char                    *load_balance;
 422 
 423                 /*
 424                  * Allocate and initialize the mdi extensions
 425                  */
 426                 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP);
 427                 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash),
 428                     KM_SLEEP);
 429                 vh->vh_client_table = hash;
 430                 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP);
 431                 (void) strcpy(vh->vh_class, class);
 432                 vh->vh_lb = LOAD_BALANCE_RR;
 433                 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip,
 434                     0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) {
 435                         if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) {
 436                                 vh->vh_lb = LOAD_BALANCE_NONE;
 437                         } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA)
 438                                     == 0) {
 439                                 vh->vh_lb = LOAD_BALANCE_LBA;
 440                         }
 441                         ddi_prop_free(load_balance);
 442                 }
 443 
 444                 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL);
 445                 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL);
 446 
 447                 /*
 448                  * Store the vHCI ops vectors
 449                  */
 450                 vh->vh_dip = vdip;
 451                 vh->vh_ops = vops;
 452 
 453                 setup_vhci_cache(vh);
 454 
 455                 if (mdi_vhci_head == NULL) {
 456                         mdi_vhci_head = vh;
 457                 }
 458                 if (mdi_vhci_tail) {
 459                         mdi_vhci_tail->vh_next = vh;
 460                 }
 461                 mdi_vhci_tail = vh;
 462                 mdi_vhci_count++;
 463         }
 464 
 465         /*
 466          * Claim the devfs node as a vhci component
 467          */
 468         DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI;
 469 
 470         /*
 471          * Initialize our back reference from dev_info node
 472          */
 473         DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh;
 474         mutex_exit(&mdi_mutex);
 475         return (MDI_SUCCESS);
 476 }
 477 
 478 /*
 479  * mdi_vhci_unregister():
 480  *              Unregister a vHCI module from mpxio framework
 481  *              mdi_vhci_unregister() is called from the detach(9E) entrypoint
 482  *              of a vhci to unregister it from the framework.
 483  * Return Values:
 484  *              MDI_SUCCESS
 485  *              MDI_FAILURE
 486  */
 487 /*ARGSUSED*/
 488 int
 489 mdi_vhci_unregister(dev_info_t *vdip, int flags)
 490 {
 491         mdi_vhci_t      *found, *vh, *prev = NULL;
 492 
 493         ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
 494 
 495         /*
 496          * Check for invalid VHCI
 497          */
 498         if ((vh = i_devi_get_vhci(vdip)) == NULL)
 499                 return (MDI_FAILURE);
 500 
 501         /*
 502          * Scan the list of registered vHCIs for a match
 503          */
 504         mutex_enter(&mdi_mutex);
 505         for (found = mdi_vhci_head; found != NULL; found = found->vh_next) {
 506                 if (found == vh)
 507                         break;
 508                 prev = found;
 509         }
 510 
 511         if (found == NULL) {
 512                 mutex_exit(&mdi_mutex);
 513                 return (MDI_FAILURE);
 514         }
 515 
 516         /*
 517          * Check the vHCI, pHCI and client count. All the pHCIs and clients
 518          * should have been unregistered, before a vHCI can be
 519          * unregistered.
 520          */
 521         MDI_VHCI_PHCI_LOCK(vh);
 522         if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) {
 523                 MDI_VHCI_PHCI_UNLOCK(vh);
 524                 mutex_exit(&mdi_mutex);
 525                 return (MDI_FAILURE);
 526         }
 527         MDI_VHCI_PHCI_UNLOCK(vh);
 528 
 529         if (destroy_vhci_cache(vh) != MDI_SUCCESS) {
 530                 mutex_exit(&mdi_mutex);
 531                 return (MDI_FAILURE);
 532         }
 533 
 534         /*
 535          * Remove the vHCI from the global list
 536          */
 537         if (vh == mdi_vhci_head) {
 538                 mdi_vhci_head = vh->vh_next;
 539         } else {
 540                 prev->vh_next = vh->vh_next;
 541         }
 542         if (vh == mdi_vhci_tail) {
 543                 mdi_vhci_tail = prev;
 544         }
 545         mdi_vhci_count--;
 546         mutex_exit(&mdi_mutex);
 547 
 548         vh->vh_ops = NULL;
 549         DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI;
 550         DEVI(vdip)->devi_mdi_xhci = NULL;
 551         kmem_free(vh->vh_class, strlen(vh->vh_class)+1);
 552         kmem_free(vh->vh_client_table,
 553             mdi_client_table_size * sizeof (struct client_hash));
 554         mutex_destroy(&vh->vh_phci_mutex);
 555         mutex_destroy(&vh->vh_client_mutex);
 556 
 557         kmem_free(vh, sizeof (mdi_vhci_t));
 558         return (MDI_SUCCESS);
 559 }
 560 
 561 /*
 562  * i_mdi_vhci_class2vhci():
 563  *              Look for a matching vHCI module given a vHCI class name
 564  * Return Values:
 565  *              Handle to a vHCI component
 566  *              NULL
 567  */
 568 static mdi_vhci_t *
 569 i_mdi_vhci_class2vhci(char *class)
 570 {
 571         mdi_vhci_t      *vh = NULL;
 572 
 573         ASSERT(!MUTEX_HELD(&mdi_mutex));
 574 
 575         mutex_enter(&mdi_mutex);
 576         for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
 577                 if (strcmp(vh->vh_class, class) == 0) {
 578                         break;
 579                 }
 580         }
 581         mutex_exit(&mdi_mutex);
 582         return (vh);
 583 }
 584 
 585 /*
 586  * i_devi_get_vhci():
 587  *              Utility function to get the handle to a vHCI component
 588  * Return Values:
 589  *              Handle to a vHCI component
 590  *              NULL
 591  */
 592 mdi_vhci_t *
 593 i_devi_get_vhci(dev_info_t *vdip)
 594 {
 595         mdi_vhci_t      *vh = NULL;
 596         if (MDI_VHCI(vdip)) {
 597                 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci;
 598         }
 599         return (vh);
 600 }
 601 
 602 /*
 603  * mdi_phci_register():
 604  *              Register a pHCI module with mpxio framework
 605  *              mdi_phci_register() is called by pHCI drivers to register with
 606  *              the mpxio framework and a specific 'class_driver' vHCI.  The
 607  *              pHCI driver must call this interface as part of its attach(9e)
 608  *              handler.
 609  * Return Values:
 610  *              MDI_SUCCESS
 611  *              MDI_FAILURE
 612  */
 613 /*ARGSUSED*/
 614 int
 615 mdi_phci_register(char *class, dev_info_t *pdip, int flags)
 616 {
 617         mdi_phci_t              *ph;
 618         mdi_vhci_t              *vh;
 619         char                    *data;
 620 
 621         /*
 622          * Some subsystems, like fcp, perform pHCI registration from a
 623          * different thread than the one doing the pHCI attach(9E) - the
 624          * driver attach code is waiting for this other thread to complete.
 625          * This means we can only ASSERT DEVI_BUSY_CHANGING of parent
 626          * (indicating that some thread has done an ndi_devi_enter of parent)
 627          * not DEVI_BUSY_OWNED (which would indicate that we did the enter).
 628          */
 629         ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
 630 
 631         /*
 632          * Check for mpxio-disable property. Enable mpxio if the property is
 633          * missing or not set to "yes".
 634          * If the property is set to "yes" then emit a brief message.
 635          */
 636         if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable",
 637             &data) == DDI_SUCCESS)) {
 638                 if (strcmp(data, "yes") == 0) {
 639                         MDI_DEBUG(1, (MDI_CONT, pdip,
 640                             "?multipath capabilities disabled via %s.conf.",
 641                             ddi_driver_name(pdip)));
 642                         ddi_prop_free(data);
 643                         return (MDI_FAILURE);
 644                 }
 645                 ddi_prop_free(data);
 646         }
 647 
 648         /*
 649          * Search for a matching vHCI
 650          */
 651         vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
 652         if (vh == NULL) {
 653                 return (MDI_FAILURE);
 654         }
 655 
 656         ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP);
 657         mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL);
 658         ph->ph_dip = pdip;
 659         ph->ph_vhci = vh;
 660         ph->ph_next = NULL;
 661         ph->ph_unstable = 0;
 662         ph->ph_vprivate = 0;
 663         cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL);
 664 
 665         MDI_PHCI_LOCK(ph);
 666         MDI_PHCI_SET_POWER_UP(ph);
 667         MDI_PHCI_UNLOCK(ph);
 668         DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI;
 669         DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph;
 670 
 671         vhcache_phci_add(vh->vh_config, ph);
 672 
 673         MDI_VHCI_PHCI_LOCK(vh);
 674         if (vh->vh_phci_head == NULL) {
 675                 vh->vh_phci_head = ph;
 676         }
 677         if (vh->vh_phci_tail) {
 678                 vh->vh_phci_tail->ph_next = ph;
 679         }
 680         vh->vh_phci_tail = ph;
 681         vh->vh_phci_count++;
 682         MDI_VHCI_PHCI_UNLOCK(vh);
 683 
 684         i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER);
 685         return (MDI_SUCCESS);
 686 }
 687 
 688 /*
 689  * mdi_phci_unregister():
 690  *              Unregister a pHCI module from mpxio framework
 691  *              mdi_phci_unregister() is called by the pHCI drivers from their
 692  *              detach(9E) handler to unregister their instances from the
 693  *              framework.
 694  * Return Values:
 695  *              MDI_SUCCESS
 696  *              MDI_FAILURE
 697  */
 698 /*ARGSUSED*/
 699 int
 700 mdi_phci_unregister(dev_info_t *pdip, int flags)
 701 {
 702         mdi_vhci_t              *vh;
 703         mdi_phci_t              *ph;
 704         mdi_phci_t              *tmp;
 705         mdi_phci_t              *prev = NULL;
 706         mdi_pathinfo_t          *pip;
 707 
 708         ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
 709 
 710         ph = i_devi_get_phci(pdip);
 711         if (ph == NULL) {
 712                 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid pHCI"));
 713                 return (MDI_FAILURE);
 714         }
 715 
 716         vh = ph->ph_vhci;
 717         ASSERT(vh != NULL);
 718         if (vh == NULL) {
 719                 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid vHCI"));
 720                 return (MDI_FAILURE);
 721         }
 722 
 723         MDI_VHCI_PHCI_LOCK(vh);
 724         tmp = vh->vh_phci_head;
 725         while (tmp) {
 726                 if (tmp == ph) {
 727                         break;
 728                 }
 729                 prev = tmp;
 730                 tmp = tmp->ph_next;
 731         }
 732 
 733         if (ph == vh->vh_phci_head) {
 734                 vh->vh_phci_head = ph->ph_next;
 735         } else {
 736                 prev->ph_next = ph->ph_next;
 737         }
 738 
 739         if (ph == vh->vh_phci_tail) {
 740                 vh->vh_phci_tail = prev;
 741         }
 742 
 743         vh->vh_phci_count--;
 744         MDI_VHCI_PHCI_UNLOCK(vh);
 745 
 746         /* Walk remaining pathinfo nodes and disassociate them from pHCI */
 747         MDI_PHCI_LOCK(ph);
 748         for (pip = (mdi_pathinfo_t *)ph->ph_path_head; pip;
 749             pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link)
 750                 MDI_PI(pip)->pi_phci = NULL;
 751         MDI_PHCI_UNLOCK(ph);
 752 
 753         i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class,
 754             ESC_DDI_INITIATOR_UNREGISTER);
 755         vhcache_phci_remove(vh->vh_config, ph);
 756         cv_destroy(&ph->ph_unstable_cv);
 757         mutex_destroy(&ph->ph_mutex);
 758         kmem_free(ph, sizeof (mdi_phci_t));
 759         DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI;
 760         DEVI(pdip)->devi_mdi_xhci = NULL;
 761         return (MDI_SUCCESS);
 762 }
 763 
 764 /*
 765  * i_devi_get_phci():
 766  *              Utility function to return the phci extensions.
 767  */
 768 static mdi_phci_t *
 769 i_devi_get_phci(dev_info_t *pdip)
 770 {
 771         mdi_phci_t      *ph = NULL;
 772 
 773         if (MDI_PHCI(pdip)) {
 774                 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci;
 775         }
 776         return (ph);
 777 }
 778 
 779 /*
 780  * Single thread mdi entry into devinfo node for modifying its children.
 781  * If necessary we perform an ndi_devi_enter of the vHCI before doing
 782  * an ndi_devi_enter of 'dip'.  We maintain circular in two parts: one
 783  * for the vHCI and one for the pHCI.
 784  */
 785 void
 786 mdi_devi_enter(dev_info_t *phci_dip, int *circular)
 787 {
 788         dev_info_t      *vdip;
 789         int             vcircular, pcircular;
 790 
 791         /* Verify calling context */
 792         ASSERT(MDI_PHCI(phci_dip));
 793         vdip = mdi_devi_get_vdip(phci_dip);
 794         ASSERT(vdip);                   /* A pHCI always has a vHCI */
 795 
 796         /*
 797          * If pHCI is detaching then the framework has already entered the
 798          * vHCI on a threads that went down the code path leading to
 799          * detach_node().  This framework enter of the vHCI during pHCI
 800          * detach is done to avoid deadlock with vHCI power management
 801          * operations which enter the vHCI and the enter down the path
 802          * to the pHCI. If pHCI is detaching then we piggyback this calls
 803          * enter of the vHCI on frameworks vHCI enter that has already
 804          * occurred - this is OK because we know that the framework thread
 805          * doing detach is waiting for our completion.
 806          *
 807          * We should DEVI_IS_DETACHING under an enter of the parent to avoid
 808          * race with detach - but we can't do that because the framework has
 809          * already entered the parent, so we have some complexity instead.
 810          */
 811         for (;;) {
 812                 if (ndi_devi_tryenter(vdip, &vcircular)) {
 813                         ASSERT(vcircular != -1);
 814                         if (DEVI_IS_DETACHING(phci_dip)) {
 815                                 ndi_devi_exit(vdip, vcircular);
 816                                 vcircular = -1;
 817                         }
 818                         break;
 819                 } else if (DEVI_IS_DETACHING(phci_dip)) {
 820                         vcircular = -1;
 821                         break;
 822                 } else if (servicing_interrupt()) {
 823                         /*
 824                          * Don't delay an interrupt (and ensure adaptive
 825                          * mutex inversion support).
 826                          */
 827                         ndi_devi_enter(vdip, &vcircular);
 828                         break;
 829                 } else {
 830                         delay_random(mdi_delay);
 831                 }
 832         }
 833 
 834         ndi_devi_enter(phci_dip, &pcircular);
 835         *circular = (vcircular << 16) | (pcircular & 0xFFFF);
 836 }
 837 
 838 /*
 839  * Attempt to mdi_devi_enter.
 840  */
 841 int
 842 mdi_devi_tryenter(dev_info_t *phci_dip, int *circular)
 843 {
 844         dev_info_t      *vdip;
 845         int             vcircular, pcircular;
 846 
 847         /* Verify calling context */
 848         ASSERT(MDI_PHCI(phci_dip));
 849         vdip = mdi_devi_get_vdip(phci_dip);
 850         ASSERT(vdip);                   /* A pHCI always has a vHCI */
 851 
 852         if (ndi_devi_tryenter(vdip, &vcircular)) {
 853                 if (ndi_devi_tryenter(phci_dip, &pcircular)) {
 854                         *circular = (vcircular << 16) | (pcircular & 0xFFFF);
 855                         return (1);     /* locked */
 856                 }
 857                 ndi_devi_exit(vdip, vcircular);
 858         }
 859         return (0);                     /* busy */
 860 }
 861 
 862 /*
 863  * Release mdi_devi_enter or successful mdi_devi_tryenter.
 864  */
 865 void
 866 mdi_devi_exit(dev_info_t *phci_dip, int circular)
 867 {
 868         dev_info_t      *vdip;
 869         int             vcircular, pcircular;
 870 
 871         /* Verify calling context */
 872         ASSERT(MDI_PHCI(phci_dip));
 873         vdip = mdi_devi_get_vdip(phci_dip);
 874         ASSERT(vdip);                   /* A pHCI always has a vHCI */
 875 
 876         /* extract two circular recursion values from single int */
 877         pcircular = (short)(circular & 0xFFFF);
 878         vcircular = (short)((circular >> 16) & 0xFFFF);
 879 
 880         ndi_devi_exit(phci_dip, pcircular);
 881         if (vcircular != -1)
 882                 ndi_devi_exit(vdip, vcircular);
 883 }
 884 
 885 /*
 886  * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used
 887  * around a pHCI drivers calls to mdi_pi_online/offline, after holding
 888  * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock
 889  * with vHCI power management code during path online/offline.  Each
 890  * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must
 891  * occur within the scope of an active mdi_devi_enter that establishes the
 892  * circular value.
 893  */
 894 void
 895 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular)
 896 {
 897         int             pcircular;
 898 
 899         /* Verify calling context */
 900         ASSERT(MDI_PHCI(phci_dip));
 901 
 902         /* Keep hold on pHCI until we reenter in mdi_devi_enter_phci */
 903         ndi_hold_devi(phci_dip);
 904 
 905         pcircular = (short)(circular & 0xFFFF);
 906         ndi_devi_exit(phci_dip, pcircular);
 907 }
 908 
 909 void
 910 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular)
 911 {
 912         int             pcircular;
 913 
 914         /* Verify calling context */
 915         ASSERT(MDI_PHCI(phci_dip));
 916 
 917         ndi_devi_enter(phci_dip, &pcircular);
 918 
 919         /* Drop hold from mdi_devi_exit_phci. */
 920         ndi_rele_devi(phci_dip);
 921 
 922         /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */
 923         ASSERT(pcircular == ((short)(*circular & 0xFFFF)));
 924 }
 925 
 926 /*
 927  * mdi_devi_get_vdip():
 928  *              given a pHCI dip return vHCI dip
 929  */
 930 dev_info_t *
 931 mdi_devi_get_vdip(dev_info_t *pdip)
 932 {
 933         mdi_phci_t      *ph;
 934 
 935         ph = i_devi_get_phci(pdip);
 936         if (ph && ph->ph_vhci)
 937                 return (ph->ph_vhci->vh_dip);
 938         return (NULL);
 939 }
 940 
 941 /*
 942  * mdi_devi_pdip_entered():
 943  *              Return 1 if we are vHCI and have done an ndi_devi_enter
 944  *              of a pHCI
 945  */
 946 int
 947 mdi_devi_pdip_entered(dev_info_t *vdip)
 948 {
 949         mdi_vhci_t      *vh;
 950         mdi_phci_t      *ph;
 951 
 952         vh = i_devi_get_vhci(vdip);
 953         if (vh == NULL)
 954                 return (0);
 955 
 956         MDI_VHCI_PHCI_LOCK(vh);
 957         ph = vh->vh_phci_head;
 958         while (ph) {
 959                 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) {
 960                         MDI_VHCI_PHCI_UNLOCK(vh);
 961                         return (1);
 962                 }
 963                 ph = ph->ph_next;
 964         }
 965         MDI_VHCI_PHCI_UNLOCK(vh);
 966         return (0);
 967 }
 968 
 969 /*
 970  * mdi_phci_path2devinfo():
 971  *              Utility function to search for a valid phci device given
 972  *              the devfs pathname.
 973  */
 974 dev_info_t *
 975 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname)
 976 {
 977         char            *temp_pathname;
 978         mdi_vhci_t      *vh;
 979         mdi_phci_t      *ph;
 980         dev_info_t      *pdip = NULL;
 981 
 982         vh = i_devi_get_vhci(vdip);
 983         ASSERT(vh != NULL);
 984 
 985         if (vh == NULL) {
 986                 /*
 987                  * Invalid vHCI component, return failure
 988                  */
 989                 return (NULL);
 990         }
 991 
 992         temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
 993         MDI_VHCI_PHCI_LOCK(vh);
 994         ph = vh->vh_phci_head;
 995         while (ph != NULL) {
 996                 pdip = ph->ph_dip;
 997                 ASSERT(pdip != NULL);
 998                 *temp_pathname = '\0';
 999                 (void) ddi_pathname(pdip, temp_pathname);
1000                 if (strcmp(temp_pathname, pathname) == 0) {
1001                         break;
1002                 }
1003                 ph = ph->ph_next;
1004         }
1005         if (ph == NULL) {
1006                 pdip = NULL;
1007         }
1008         MDI_VHCI_PHCI_UNLOCK(vh);
1009         kmem_free(temp_pathname, MAXPATHLEN);
1010         return (pdip);
1011 }
1012 
1013 /*
1014  * mdi_phci_get_path_count():
1015  *              get number of path information nodes associated with a given
1016  *              pHCI device.
1017  */
1018 int
1019 mdi_phci_get_path_count(dev_info_t *pdip)
1020 {
1021         mdi_phci_t      *ph;
1022         int             count = 0;
1023 
1024         ph = i_devi_get_phci(pdip);
1025         if (ph != NULL) {
1026                 count = ph->ph_path_count;
1027         }
1028         return (count);
1029 }
1030 
1031 /*
1032  * i_mdi_phci_lock():
1033  *              Lock a pHCI device
1034  * Return Values:
1035  *              None
1036  * Note:
1037  *              The default locking order is:
1038  *              _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
1039  *              But there are number of situations where locks need to be
1040  *              grabbed in reverse order.  This routine implements try and lock
1041  *              mechanism depending on the requested parameter option.
1042  */
1043 static void
1044 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip)
1045 {
1046         if (pip) {
1047                 /* Reverse locking is requested. */
1048                 while (MDI_PHCI_TRYLOCK(ph) == 0) {
1049                         if (servicing_interrupt()) {
1050                                 MDI_PI_HOLD(pip);
1051                                 MDI_PI_UNLOCK(pip);
1052                                 MDI_PHCI_LOCK(ph);
1053                                 MDI_PI_LOCK(pip);
1054                                 MDI_PI_RELE(pip);
1055                                 break;
1056                         } else {
1057                                 /*
1058                                  * tryenter failed. Try to grab again
1059                                  * after a small delay
1060                                  */
1061                                 MDI_PI_HOLD(pip);
1062                                 MDI_PI_UNLOCK(pip);
1063                                 delay_random(mdi_delay);
1064                                 MDI_PI_LOCK(pip);
1065                                 MDI_PI_RELE(pip);
1066                         }
1067                 }
1068         } else {
1069                 MDI_PHCI_LOCK(ph);
1070         }
1071 }
1072 
1073 /*
1074  * i_mdi_phci_unlock():
1075  *              Unlock the pHCI component
1076  */
1077 static void
1078 i_mdi_phci_unlock(mdi_phci_t *ph)
1079 {
1080         MDI_PHCI_UNLOCK(ph);
1081 }
1082 
1083 /*
1084  * i_mdi_devinfo_create():
1085  *              create client device's devinfo node
1086  * Return Values:
1087  *              dev_info
1088  *              NULL
1089  * Notes:
1090  */
1091 static dev_info_t *
1092 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid,
1093         char **compatible, int ncompatible)
1094 {
1095         dev_info_t *cdip = NULL;
1096 
1097         ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1098 
1099         /* Verify for duplicate entry */
1100         cdip = i_mdi_devinfo_find(vh, name, guid);
1101         ASSERT(cdip == NULL);
1102         if (cdip) {
1103                 cmn_err(CE_WARN,
1104                     "i_mdi_devinfo_create: client %s@%s already exists",
1105                         name ? name : "", guid ? guid : "");
1106         }
1107 
1108         ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip);
1109         if (cdip == NULL)
1110                 goto fail;
1111 
1112         /*
1113          * Create component type and Global unique identifier
1114          * properties
1115          */
1116         if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip,
1117             MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) {
1118                 goto fail;
1119         }
1120 
1121         /* Decorate the node with compatible property */
1122         if (compatible &&
1123             (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip,
1124             "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) {
1125                 goto fail;
1126         }
1127 
1128         return (cdip);
1129 
1130 fail:
1131         if (cdip) {
1132                 (void) ndi_prop_remove_all(cdip);
1133                 (void) ndi_devi_free(cdip);
1134         }
1135         return (NULL);
1136 }
1137 
1138 /*
1139  * i_mdi_devinfo_find():
1140  *              Find a matching devinfo node for given client node name
1141  *              and its guid.
1142  * Return Values:
1143  *              Handle to a dev_info node or NULL
1144  */
1145 static dev_info_t *
1146 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid)
1147 {
1148         char                    *data;
1149         dev_info_t              *cdip = NULL;
1150         dev_info_t              *ndip = NULL;
1151         int                     circular;
1152 
1153         ndi_devi_enter(vh->vh_dip, &circular);
1154         ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child;
1155         while ((cdip = ndip) != NULL) {
1156                 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1157 
1158                 if (strcmp(DEVI(cdip)->devi_node_name, name)) {
1159                         continue;
1160                 }
1161 
1162                 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip,
1163                     DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP,
1164                     &data) != DDI_PROP_SUCCESS) {
1165                         continue;
1166                 }
1167 
1168                 if (strcmp(data, guid) != 0) {
1169                         ddi_prop_free(data);
1170                         continue;
1171                 }
1172                 ddi_prop_free(data);
1173                 break;
1174         }
1175         ndi_devi_exit(vh->vh_dip, circular);
1176         return (cdip);
1177 }
1178 
1179 /*
1180  * i_mdi_devinfo_remove():
1181  *              Remove a client device node
1182  */
1183 static int
1184 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags)
1185 {
1186         int     rv = MDI_SUCCESS;
1187 
1188         if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS ||
1189             (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) {
1190                 int nflags = NDI_DEVFS_CLEAN | NDI_DEVI_REMOVE;
1191 
1192                 if (flags & MDI_CLIENT_FLAGS_NO_EVENT)
1193                         nflags |= NDI_NO_EVENT;
1194 
1195                 rv = ndi_devi_offline(cdip, nflags);
1196                 if (rv != NDI_SUCCESS) {
1197                         MDI_DEBUG(1, (MDI_NOTE, cdip,
1198                             "!failed: cdip %p", (void *)cdip));
1199                 }
1200                 /*
1201                  * Convert to MDI error code
1202                  */
1203                 switch (rv) {
1204                 case NDI_SUCCESS:
1205                         rv = MDI_SUCCESS;
1206                         break;
1207                 case NDI_BUSY:
1208                         rv = MDI_BUSY;
1209                         break;
1210                 default:
1211                         rv = MDI_FAILURE;
1212                         break;
1213                 }
1214         }
1215         return (rv);
1216 }
1217 
1218 /*
1219  * i_devi_get_client()
1220  *              Utility function to get mpxio component extensions
1221  */
1222 static mdi_client_t *
1223 i_devi_get_client(dev_info_t *cdip)
1224 {
1225         mdi_client_t    *ct = NULL;
1226 
1227         if (MDI_CLIENT(cdip)) {
1228                 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client;
1229         }
1230         return (ct);
1231 }
1232 
1233 /*
1234  * i_mdi_is_child_present():
1235  *              Search for the presence of client device dev_info node
1236  */
1237 static int
1238 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip)
1239 {
1240         int             rv = MDI_FAILURE;
1241         struct dev_info *dip;
1242         int             circular;
1243 
1244         ndi_devi_enter(vdip, &circular);
1245         dip = DEVI(vdip)->devi_child;
1246         while (dip) {
1247                 if (dip == DEVI(cdip)) {
1248                         rv = MDI_SUCCESS;
1249                         break;
1250                 }
1251                 dip = dip->devi_sibling;
1252         }
1253         ndi_devi_exit(vdip, circular);
1254         return (rv);
1255 }
1256 
1257 
1258 /*
1259  * i_mdi_client_lock():
1260  *              Grab client component lock
1261  * Return Values:
1262  *              None
1263  * Note:
1264  *              The default locking order is:
1265  *              _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
1266  *              But there are number of situations where locks need to be
1267  *              grabbed in reverse order.  This routine implements try and lock
1268  *              mechanism depending on the requested parameter option.
1269  */
1270 static void
1271 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip)
1272 {
1273         if (pip) {
1274                 /*
1275                  * Reverse locking is requested.
1276                  */
1277                 while (MDI_CLIENT_TRYLOCK(ct) == 0) {
1278                         if (servicing_interrupt()) {
1279                                 MDI_PI_HOLD(pip);
1280                                 MDI_PI_UNLOCK(pip);
1281                                 MDI_CLIENT_LOCK(ct);
1282                                 MDI_PI_LOCK(pip);
1283                                 MDI_PI_RELE(pip);
1284                                 break;
1285                         } else {
1286                                 /*
1287                                  * tryenter failed. Try to grab again
1288                                  * after a small delay
1289                                  */
1290                                 MDI_PI_HOLD(pip);
1291                                 MDI_PI_UNLOCK(pip);
1292                                 delay_random(mdi_delay);
1293                                 MDI_PI_LOCK(pip);
1294                                 MDI_PI_RELE(pip);
1295                         }
1296                 }
1297         } else {
1298                 MDI_CLIENT_LOCK(ct);
1299         }
1300 }
1301 
1302 /*
1303  * i_mdi_client_unlock():
1304  *              Unlock a client component
1305  */
1306 static void
1307 i_mdi_client_unlock(mdi_client_t *ct)
1308 {
1309         MDI_CLIENT_UNLOCK(ct);
1310 }
1311 
1312 /*
1313  * i_mdi_client_alloc():
1314  *              Allocate and initialize a client structure.  Caller should
1315  *              hold the vhci client lock.
1316  * Return Values:
1317  *              Handle to a client component
1318  */
1319 /*ARGSUSED*/
1320 static mdi_client_t *
1321 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid)
1322 {
1323         mdi_client_t    *ct;
1324 
1325         ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1326 
1327         /*
1328          * Allocate and initialize a component structure.
1329          */
1330         ct = kmem_zalloc(sizeof (*ct), KM_SLEEP);
1331         mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
1332         ct->ct_hnext = NULL;
1333         ct->ct_hprev = NULL;
1334         ct->ct_dip = NULL;
1335         ct->ct_vhci = vh;
1336         ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP);
1337         (void) strcpy(ct->ct_drvname, name);
1338         ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP);
1339         (void) strcpy(ct->ct_guid, lguid);
1340         ct->ct_cprivate = NULL;
1341         ct->ct_vprivate = NULL;
1342         ct->ct_flags = 0;
1343         ct->ct_state = MDI_CLIENT_STATE_FAILED;
1344         MDI_CLIENT_LOCK(ct);
1345         MDI_CLIENT_SET_OFFLINE(ct);
1346         MDI_CLIENT_SET_DETACH(ct);
1347         MDI_CLIENT_SET_POWER_UP(ct);
1348         MDI_CLIENT_UNLOCK(ct);
1349         ct->ct_failover_flags = 0;
1350         ct->ct_failover_status = 0;
1351         cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL);
1352         ct->ct_unstable = 0;
1353         cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL);
1354         cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL);
1355         ct->ct_lb = vh->vh_lb;
1356         ct->ct_lb_args =  kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP);
1357         ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE;
1358         ct->ct_path_count = 0;
1359         ct->ct_path_head = NULL;
1360         ct->ct_path_tail = NULL;
1361         ct->ct_path_last = NULL;
1362 
1363         /*
1364          * Add this client component to our client hash queue
1365          */
1366         i_mdi_client_enlist_table(vh, ct);
1367         return (ct);
1368 }
1369 
1370 /*
1371  * i_mdi_client_enlist_table():
1372  *              Attach the client device to the client hash table. Caller
1373  *              should hold the vhci client lock.
1374  */
1375 static void
1376 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1377 {
1378         int                     index;
1379         struct client_hash      *head;
1380 
1381         ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1382 
1383         index = i_mdi_get_hash_key(ct->ct_guid);
1384         head = &vh->vh_client_table[index];
1385         ct->ct_hnext = (mdi_client_t *)head->ct_hash_head;
1386         head->ct_hash_head = ct;
1387         head->ct_hash_count++;
1388         vh->vh_client_count++;
1389 }
1390 
1391 /*
1392  * i_mdi_client_delist_table():
1393  *              Attach the client device to the client hash table.
1394  *              Caller should hold the vhci client lock.
1395  */
1396 static void
1397 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1398 {
1399         int                     index;
1400         char                    *guid;
1401         struct client_hash      *head;
1402         mdi_client_t            *next;
1403         mdi_client_t            *last;
1404 
1405         ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1406 
1407         guid = ct->ct_guid;
1408         index = i_mdi_get_hash_key(guid);
1409         head = &vh->vh_client_table[index];
1410 
1411         last = NULL;
1412         next = (mdi_client_t *)head->ct_hash_head;
1413         while (next != NULL) {
1414                 if (next == ct) {
1415                         break;
1416                 }
1417                 last = next;
1418                 next = next->ct_hnext;
1419         }
1420 
1421         if (next) {
1422                 head->ct_hash_count--;
1423                 if (last == NULL) {
1424                         head->ct_hash_head = ct->ct_hnext;
1425                 } else {
1426                         last->ct_hnext = ct->ct_hnext;
1427                 }
1428                 ct->ct_hnext = NULL;
1429                 vh->vh_client_count--;
1430         }
1431 }
1432 
1433 
1434 /*
1435  * i_mdi_client_free():
1436  *              Free a client component
1437  */
1438 static int
1439 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct)
1440 {
1441         int             rv = MDI_SUCCESS;
1442         int             flags = ct->ct_flags;
1443         dev_info_t      *cdip;
1444         dev_info_t      *vdip;
1445 
1446         ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1447 
1448         vdip = vh->vh_dip;
1449         cdip = ct->ct_dip;
1450 
1451         (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP);
1452         DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT;
1453         DEVI(cdip)->devi_mdi_client = NULL;
1454 
1455         /*
1456          * Clear out back ref. to dev_info_t node
1457          */
1458         ct->ct_dip = NULL;
1459 
1460         /*
1461          * Remove this client from our hash queue
1462          */
1463         i_mdi_client_delist_table(vh, ct);
1464 
1465         /*
1466          * Uninitialize and free the component
1467          */
1468         kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1);
1469         kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1);
1470         kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t));
1471         cv_destroy(&ct->ct_failover_cv);
1472         cv_destroy(&ct->ct_unstable_cv);
1473         cv_destroy(&ct->ct_powerchange_cv);
1474         mutex_destroy(&ct->ct_mutex);
1475         kmem_free(ct, sizeof (*ct));
1476 
1477         if (cdip != NULL) {
1478                 MDI_VHCI_CLIENT_UNLOCK(vh);
1479                 (void) i_mdi_devinfo_remove(vdip, cdip, flags);
1480                 MDI_VHCI_CLIENT_LOCK(vh);
1481         }
1482         return (rv);
1483 }
1484 
1485 /*
1486  * i_mdi_client_find():
1487  *              Find the client structure corresponding to a given guid
1488  *              Caller should hold the vhci client lock.
1489  */
1490 static mdi_client_t *
1491 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid)
1492 {
1493         int                     index;
1494         struct client_hash      *head;
1495         mdi_client_t            *ct;
1496 
1497         ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1498 
1499         index = i_mdi_get_hash_key(guid);
1500         head = &vh->vh_client_table[index];
1501 
1502         ct = head->ct_hash_head;
1503         while (ct != NULL) {
1504                 if (strcmp(ct->ct_guid, guid) == 0 &&
1505                     (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) {
1506                         break;
1507                 }
1508                 ct = ct->ct_hnext;
1509         }
1510         return (ct);
1511 }
1512 
1513 /*
1514  * i_mdi_client_update_state():
1515  *              Compute and update client device state
1516  * Notes:
1517  *              A client device can be in any of three possible states:
1518  *
1519  *              MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more
1520  *              one online/standby paths. Can tolerate failures.
1521  *              MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with
1522  *              no alternate paths available as standby. A failure on the online
1523  *              would result in loss of access to device data.
1524  *              MDI_CLIENT_STATE_FAILED - Client device in failed state with
1525  *              no paths available to access the device.
1526  */
1527 static void
1528 i_mdi_client_update_state(mdi_client_t *ct)
1529 {
1530         int state;
1531 
1532         ASSERT(MDI_CLIENT_LOCKED(ct));
1533         state = i_mdi_client_compute_state(ct, NULL);
1534         MDI_CLIENT_SET_STATE(ct, state);
1535 }
1536 
1537 /*
1538  * i_mdi_client_compute_state():
1539  *              Compute client device state
1540  *
1541  *              mdi_phci_t *    Pointer to pHCI structure which should
1542  *                              while computing the new value.  Used by
1543  *                              i_mdi_phci_offline() to find the new
1544  *                              client state after DR of a pHCI.
1545  */
1546 static int
1547 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph)
1548 {
1549         int             state;
1550         int             online_count = 0;
1551         int             standby_count = 0;
1552         mdi_pathinfo_t  *pip, *next;
1553 
1554         ASSERT(MDI_CLIENT_LOCKED(ct));
1555         pip = ct->ct_path_head;
1556         while (pip != NULL) {
1557                 MDI_PI_LOCK(pip);
1558                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1559                 if (MDI_PI(pip)->pi_phci == ph) {
1560                         MDI_PI_UNLOCK(pip);
1561                         pip = next;
1562                         continue;
1563                 }
1564 
1565                 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1566                                 == MDI_PATHINFO_STATE_ONLINE)
1567                         online_count++;
1568                 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1569                                 == MDI_PATHINFO_STATE_STANDBY)
1570                         standby_count++;
1571                 MDI_PI_UNLOCK(pip);
1572                 pip = next;
1573         }
1574 
1575         if (online_count == 0) {
1576                 if (standby_count == 0) {
1577                         state = MDI_CLIENT_STATE_FAILED;
1578                         MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
1579                             "client state failed: ct = %p", (void *)ct));
1580                 } else if (standby_count == 1) {
1581                         state = MDI_CLIENT_STATE_DEGRADED;
1582                 } else {
1583                         state = MDI_CLIENT_STATE_OPTIMAL;
1584                 }
1585         } else if (online_count == 1) {
1586                 if (standby_count == 0) {
1587                         state = MDI_CLIENT_STATE_DEGRADED;
1588                 } else {
1589                         state = MDI_CLIENT_STATE_OPTIMAL;
1590                 }
1591         } else {
1592                 state = MDI_CLIENT_STATE_OPTIMAL;
1593         }
1594         return (state);
1595 }
1596 
1597 /*
1598  * i_mdi_client2devinfo():
1599  *              Utility function
1600  */
1601 dev_info_t *
1602 i_mdi_client2devinfo(mdi_client_t *ct)
1603 {
1604         return (ct->ct_dip);
1605 }
1606 
1607 /*
1608  * mdi_client_path2_devinfo():
1609  *              Given the parent devinfo and child devfs pathname, search for
1610  *              a valid devfs node handle.
1611  */
1612 dev_info_t *
1613 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname)
1614 {
1615         dev_info_t      *cdip = NULL;
1616         dev_info_t      *ndip = NULL;
1617         char            *temp_pathname;
1618         int             circular;
1619 
1620         /*
1621          * Allocate temp buffer
1622          */
1623         temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1624 
1625         /*
1626          * Lock parent against changes
1627          */
1628         ndi_devi_enter(vdip, &circular);
1629         ndip = (dev_info_t *)DEVI(vdip)->devi_child;
1630         while ((cdip = ndip) != NULL) {
1631                 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1632 
1633                 *temp_pathname = '\0';
1634                 (void) ddi_pathname(cdip, temp_pathname);
1635                 if (strcmp(temp_pathname, pathname) == 0) {
1636                         break;
1637                 }
1638         }
1639         /*
1640          * Release devinfo lock
1641          */
1642         ndi_devi_exit(vdip, circular);
1643 
1644         /*
1645          * Free the temp buffer
1646          */
1647         kmem_free(temp_pathname, MAXPATHLEN);
1648         return (cdip);
1649 }
1650 
1651 /*
1652  * mdi_client_get_path_count():
1653  *              Utility function to get number of path information nodes
1654  *              associated with a given client device.
1655  */
1656 int
1657 mdi_client_get_path_count(dev_info_t *cdip)
1658 {
1659         mdi_client_t    *ct;
1660         int             count = 0;
1661 
1662         ct = i_devi_get_client(cdip);
1663         if (ct != NULL) {
1664                 count = ct->ct_path_count;
1665         }
1666         return (count);
1667 }
1668 
1669 
1670 /*
1671  * i_mdi_get_hash_key():
1672  *              Create a hash using strings as keys
1673  *
1674  */
1675 static int
1676 i_mdi_get_hash_key(char *str)
1677 {
1678         uint32_t        g, hash = 0;
1679         char            *p;
1680 
1681         for (p = str; *p != '\0'; p++) {
1682                 g = *p;
1683                 hash += g;
1684         }
1685         return (hash % (CLIENT_HASH_TABLE_SIZE - 1));
1686 }
1687 
1688 /*
1689  * mdi_get_lb_policy():
1690  *              Get current load balancing policy for a given client device
1691  */
1692 client_lb_t
1693 mdi_get_lb_policy(dev_info_t *cdip)
1694 {
1695         client_lb_t     lb = LOAD_BALANCE_NONE;
1696         mdi_client_t    *ct;
1697 
1698         ct = i_devi_get_client(cdip);
1699         if (ct != NULL) {
1700                 lb = ct->ct_lb;
1701         }
1702         return (lb);
1703 }
1704 
1705 /*
1706  * mdi_set_lb_region_size():
1707  *              Set current region size for the load-balance
1708  */
1709 int
1710 mdi_set_lb_region_size(dev_info_t *cdip, int region_size)
1711 {
1712         mdi_client_t    *ct;
1713         int             rv = MDI_FAILURE;
1714 
1715         ct = i_devi_get_client(cdip);
1716         if (ct != NULL && ct->ct_lb_args != NULL) {
1717                 ct->ct_lb_args->region_size = region_size;
1718                 rv = MDI_SUCCESS;
1719         }
1720         return (rv);
1721 }
1722 
1723 /*
1724  * mdi_Set_lb_policy():
1725  *              Set current load balancing policy for a given client device
1726  */
1727 int
1728 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb)
1729 {
1730         mdi_client_t    *ct;
1731         int             rv = MDI_FAILURE;
1732 
1733         ct = i_devi_get_client(cdip);
1734         if (ct != NULL) {
1735                 ct->ct_lb = lb;
1736                 rv = MDI_SUCCESS;
1737         }
1738         return (rv);
1739 }
1740 
1741 /*
1742  * mdi_failover():
1743  *              failover function called by the vHCI drivers to initiate
1744  *              a failover operation.  This is typically due to non-availability
1745  *              of online paths to route I/O requests.  Failover can be
1746  *              triggered through user application also.
1747  *
1748  *              The vHCI driver calls mdi_failover() to initiate a failover
1749  *              operation. mdi_failover() calls back into the vHCI driver's
1750  *              vo_failover() entry point to perform the actual failover
1751  *              operation.  The reason for requiring the vHCI driver to
1752  *              initiate failover by calling mdi_failover(), instead of directly
1753  *              executing vo_failover() itself, is to ensure that the mdi
1754  *              framework can keep track of the client state properly.
1755  *              Additionally, mdi_failover() provides as a convenience the
1756  *              option of performing the failover operation synchronously or
1757  *              asynchronously
1758  *
1759  *              Upon successful completion of the failover operation, the
1760  *              paths that were previously ONLINE will be in the STANDBY state,
1761  *              and the newly activated paths will be in the ONLINE state.
1762  *
1763  *              The flags modifier determines whether the activation is done
1764  *              synchronously: MDI_FAILOVER_SYNC
1765  * Return Values:
1766  *              MDI_SUCCESS
1767  *              MDI_FAILURE
1768  *              MDI_BUSY
1769  */
1770 /*ARGSUSED*/
1771 int
1772 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
1773 {
1774         int                     rv;
1775         mdi_client_t            *ct;
1776 
1777         ct = i_devi_get_client(cdip);
1778         ASSERT(ct != NULL);
1779         if (ct == NULL) {
1780                 /* cdip is not a valid client device. Nothing more to do. */
1781                 return (MDI_FAILURE);
1782         }
1783 
1784         MDI_CLIENT_LOCK(ct);
1785 
1786         if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) {
1787                 /* A path to the client is being freed */
1788                 MDI_CLIENT_UNLOCK(ct);
1789                 return (MDI_BUSY);
1790         }
1791 
1792 
1793         if (MDI_CLIENT_IS_FAILED(ct)) {
1794                 /*
1795                  * Client is in failed state. Nothing more to do.
1796                  */
1797                 MDI_CLIENT_UNLOCK(ct);
1798                 return (MDI_FAILURE);
1799         }
1800 
1801         if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1802                 /*
1803                  * Failover is already in progress; return BUSY
1804                  */
1805                 MDI_CLIENT_UNLOCK(ct);
1806                 return (MDI_BUSY);
1807         }
1808         /*
1809          * Make sure that mdi_pathinfo node state changes are processed.
1810          * We do not allow failovers to progress while client path state
1811          * changes are in progress
1812          */
1813         if (ct->ct_unstable) {
1814                 if (flags == MDI_FAILOVER_ASYNC) {
1815                         MDI_CLIENT_UNLOCK(ct);
1816                         return (MDI_BUSY);
1817                 } else {
1818                         while (ct->ct_unstable)
1819                                 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex);
1820                 }
1821         }
1822 
1823         /*
1824          * Client device is in stable state. Before proceeding, perform sanity
1825          * checks again.
1826          */
1827         if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) ||
1828             (!i_ddi_devi_attached(cdip))) {
1829                 /*
1830                  * Client is in failed state. Nothing more to do.
1831                  */
1832                 MDI_CLIENT_UNLOCK(ct);
1833                 return (MDI_FAILURE);
1834         }
1835 
1836         /*
1837          * Set the client state as failover in progress.
1838          */
1839         MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct);
1840         ct->ct_failover_flags = flags;
1841         MDI_CLIENT_UNLOCK(ct);
1842 
1843         if (flags == MDI_FAILOVER_ASYNC) {
1844                 /*
1845                  * Submit the initiate failover request via CPR safe
1846                  * taskq threads.
1847                  */
1848                 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover,
1849                     ct, KM_SLEEP);
1850                 return (MDI_ACCEPT);
1851         } else {
1852                 /*
1853                  * Synchronous failover mode.  Typically invoked from the user
1854                  * land.
1855                  */
1856                 rv = i_mdi_failover(ct);
1857         }
1858         return (rv);
1859 }
1860 
1861 /*
1862  * i_mdi_failover():
1863  *              internal failover function. Invokes vHCI drivers failover
1864  *              callback function and process the failover status
1865  * Return Values:
1866  *              None
1867  *
1868  * Note: A client device in failover state can not be detached or freed.
1869  */
1870 static int
1871 i_mdi_failover(void *arg)
1872 {
1873         int             rv = MDI_SUCCESS;
1874         mdi_client_t    *ct = (mdi_client_t *)arg;
1875         mdi_vhci_t      *vh = ct->ct_vhci;
1876 
1877         ASSERT(!MDI_CLIENT_LOCKED(ct));
1878 
1879         if (vh->vh_ops->vo_failover != NULL) {
1880                 /*
1881                  * Call vHCI drivers callback routine
1882                  */
1883                 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip,
1884                     ct->ct_failover_flags);
1885         }
1886 
1887         MDI_CLIENT_LOCK(ct);
1888         MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct);
1889 
1890         /*
1891          * Save the failover return status
1892          */
1893         ct->ct_failover_status = rv;
1894 
1895         /*
1896          * As a result of failover, client status would have been changed.
1897          * Update the client state and wake up anyone waiting on this client
1898          * device.
1899          */
1900         i_mdi_client_update_state(ct);
1901 
1902         cv_broadcast(&ct->ct_failover_cv);
1903         MDI_CLIENT_UNLOCK(ct);
1904         return (rv);
1905 }
1906 
1907 /*
1908  * Load balancing is logical block.
1909  * IOs within the range described by region_size
1910  * would go on the same path. This would improve the
1911  * performance by cache-hit on some of the RAID devices.
1912  * Search only for online paths(At some point we
1913  * may want to balance across target ports).
1914  * If no paths are found then default to round-robin.
1915  */
1916 static int
1917 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp)
1918 {
1919         int             path_index = -1;
1920         int             online_path_count = 0;
1921         int             online_nonpref_path_count = 0;
1922         int             region_size = ct->ct_lb_args->region_size;
1923         mdi_pathinfo_t  *pip;
1924         mdi_pathinfo_t  *next;
1925         int             preferred, path_cnt;
1926 
1927         pip = ct->ct_path_head;
1928         while (pip) {
1929                 MDI_PI_LOCK(pip);
1930                 if (MDI_PI(pip)->pi_state ==
1931                     MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) {
1932                         online_path_count++;
1933                 } else if (MDI_PI(pip)->pi_state ==
1934                     MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) {
1935                         online_nonpref_path_count++;
1936                 }
1937                 next = (mdi_pathinfo_t *)
1938                     MDI_PI(pip)->pi_client_link;
1939                 MDI_PI_UNLOCK(pip);
1940                 pip = next;
1941         }
1942         /* if found any online/preferred then use this type */
1943         if (online_path_count > 0) {
1944                 path_cnt = online_path_count;
1945                 preferred = 1;
1946         } else if (online_nonpref_path_count > 0) {
1947                 path_cnt = online_nonpref_path_count;
1948                 preferred = 0;
1949         } else {
1950                 path_cnt = 0;
1951         }
1952         if (path_cnt) {
1953                 path_index = (bp->b_blkno >> region_size) % path_cnt;
1954                 pip = ct->ct_path_head;
1955                 while (pip && path_index != -1) {
1956                         MDI_PI_LOCK(pip);
1957                         if (path_index == 0 &&
1958                             (MDI_PI(pip)->pi_state ==
1959                             MDI_PATHINFO_STATE_ONLINE) &&
1960                                 MDI_PI(pip)->pi_preferred == preferred) {
1961                                 MDI_PI_HOLD(pip);
1962                                 MDI_PI_UNLOCK(pip);
1963                                 *ret_pip = pip;
1964                                 return (MDI_SUCCESS);
1965                         }
1966                         path_index --;
1967                         next = (mdi_pathinfo_t *)
1968                             MDI_PI(pip)->pi_client_link;
1969                         MDI_PI_UNLOCK(pip);
1970                         pip = next;
1971                 }
1972                 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
1973                     "lba %llx: path %s %p",
1974                     bp->b_lblkno, mdi_pi_spathname(pip), (void *)pip));
1975         }
1976         return (MDI_FAILURE);
1977 }
1978 
1979 /*
1980  * mdi_select_path():
1981  *              select a path to access a client device.
1982  *
1983  *              mdi_select_path() function is called by the vHCI drivers to
1984  *              select a path to route the I/O request to.  The caller passes
1985  *              the block I/O data transfer structure ("buf") as one of the
1986  *              parameters.  The mpxio framework uses the buf structure
1987  *              contents to maintain per path statistics (total I/O size /
1988  *              count pending).  If more than one online paths are available to
1989  *              select, the framework automatically selects a suitable path
1990  *              for routing I/O request. If a failover operation is active for
1991  *              this client device the call shall be failed with MDI_BUSY error
1992  *              code.
1993  *
1994  *              By default this function returns a suitable path in online
1995  *              state based on the current load balancing policy.  Currently
1996  *              we support LOAD_BALANCE_NONE (Previously selected online path
1997  *              will continue to be used till the path is usable) and
1998  *              LOAD_BALANCE_RR (Online paths will be selected in a round
1999  *              robin fashion), LOAD_BALANCE_LB(Online paths will be selected
2000  *              based on the logical block).  The load balancing
2001  *              through vHCI drivers configuration file (driver.conf).
2002  *
2003  *              vHCI drivers may override this default behavior by specifying
2004  *              appropriate flags.  The meaning of the thrid argument depends
2005  *              on the flags specified. If MDI_SELECT_PATH_INSTANCE is set
2006  *              then the argument is the "path instance" of the path to select.
2007  *              If MDI_SELECT_PATH_INSTANCE is not set then the argument is
2008  *              "start_pip". A non NULL "start_pip" is the starting point to
2009  *              walk and find the next appropriate path.  The following values
2010  *              are currently defined: MDI_SELECT_ONLINE_PATH (to select an
2011  *              ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an
2012  *              STANDBY path).
2013  *
2014  *              The non-standard behavior is used by the scsi_vhci driver,
2015  *              whenever it has to use a STANDBY/FAULTED path.  Eg. during
2016  *              attach of client devices (to avoid an unnecessary failover
2017  *              when the STANDBY path comes up first), during failover
2018  *              (to activate a STANDBY path as ONLINE).
2019  *
2020  *              The selected path is returned in a a mdi_hold_path() state
2021  *              (pi_ref_cnt). Caller should release the hold by calling
2022  *              mdi_rele_path().
2023  *
2024  * Return Values:
2025  *              MDI_SUCCESS     - Completed successfully
2026  *              MDI_BUSY        - Client device is busy failing over
2027  *              MDI_NOPATH      - Client device is online, but no valid path are
2028  *                                available to access this client device
2029  *              MDI_FAILURE     - Invalid client device or state
2030  *              MDI_DEVI_ONLINING
2031  *                              - Client device (struct dev_info state) is in
2032  *                                onlining state.
2033  */
2034 
2035 /*ARGSUSED*/
2036 int
2037 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags,
2038     void *arg, mdi_pathinfo_t **ret_pip)
2039 {
2040         mdi_client_t    *ct;
2041         mdi_pathinfo_t  *pip;
2042         mdi_pathinfo_t  *next;
2043         mdi_pathinfo_t  *head;
2044         mdi_pathinfo_t  *start;
2045         client_lb_t     lbp;    /* load balancing policy */
2046         int             sb = 1; /* standard behavior */
2047         int             preferred = 1;  /* preferred path */
2048         int             cond, cont = 1;
2049         int             retry = 0;
2050         mdi_pathinfo_t  *start_pip;     /* request starting pathinfo */
2051         int             path_instance;  /* request specific path instance */
2052 
2053         /* determine type of arg based on flags */
2054         if (flags & MDI_SELECT_PATH_INSTANCE) {
2055                 path_instance = (int)(intptr_t)arg;
2056                 start_pip = NULL;
2057         } else {
2058                 path_instance = 0;
2059                 start_pip = (mdi_pathinfo_t *)arg;
2060         }
2061 
2062         if (flags != 0) {
2063                 /*
2064                  * disable default behavior
2065                  */
2066                 sb = 0;
2067         }
2068 
2069         *ret_pip = NULL;
2070         ct = i_devi_get_client(cdip);
2071         if (ct == NULL) {
2072                 /* mdi extensions are NULL, Nothing more to do */
2073                 return (MDI_FAILURE);
2074         }
2075 
2076         MDI_CLIENT_LOCK(ct);
2077 
2078         if (sb) {
2079                 if (MDI_CLIENT_IS_FAILED(ct)) {
2080                         /*
2081                          * Client is not ready to accept any I/O requests.
2082                          * Fail this request.
2083                          */
2084                         MDI_DEBUG(2, (MDI_NOTE, cdip,
2085                             "client state offline ct = %p", (void *)ct));
2086                         MDI_CLIENT_UNLOCK(ct);
2087                         return (MDI_FAILURE);
2088                 }
2089 
2090                 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
2091                         /*
2092                          * Check for Failover is in progress. If so tell the
2093                          * caller that this device is busy.
2094                          */
2095                         MDI_DEBUG(2, (MDI_NOTE, cdip,
2096                             "client failover in progress ct = %p",
2097                             (void *)ct));
2098                         MDI_CLIENT_UNLOCK(ct);
2099                         return (MDI_BUSY);
2100                 }
2101 
2102                 /*
2103                  * Check to see whether the client device is attached.
2104                  * If not so, let the vHCI driver manually select a path
2105                  * (standby) and let the probe/attach process to continue.
2106                  */
2107                 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) {
2108                         MDI_DEBUG(4, (MDI_NOTE, cdip,
2109                             "devi is onlining ct = %p", (void *)ct));
2110                         MDI_CLIENT_UNLOCK(ct);
2111                         return (MDI_DEVI_ONLINING);
2112                 }
2113         }
2114 
2115         /*
2116          * Cache in the client list head.  If head of the list is NULL
2117          * return MDI_NOPATH
2118          */
2119         head = ct->ct_path_head;
2120         if (head == NULL) {
2121                 MDI_CLIENT_UNLOCK(ct);
2122                 return (MDI_NOPATH);
2123         }
2124 
2125         /* Caller is specifying a specific pathinfo path by path_instance */
2126         if (path_instance) {
2127                 /* search for pathinfo with correct path_instance */
2128                 for (pip = head;
2129                     pip && (mdi_pi_get_path_instance(pip) != path_instance);
2130                     pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link)
2131                         ;
2132 
2133                 /* If path can't be selected then MDI_NOPATH is returned. */
2134                 if (pip == NULL) {
2135                         MDI_CLIENT_UNLOCK(ct);
2136                         return (MDI_NOPATH);
2137                 }
2138 
2139                 /*
2140                  * Verify state of path. When asked to select a specific
2141                  * path_instance, we select the requested path in any
2142                  * state (ONLINE, OFFLINE, STANDBY, FAULT) other than INIT.
2143                  * We don't however select paths where the pHCI has detached.
2144                  * NOTE: last pathinfo node of an opened client device may
2145                  * exist in an OFFLINE state after the pHCI associated with
2146                  * that path has detached (but pi_phci will be NULL if that
2147                  * has occurred).
2148                  */
2149                 MDI_PI_LOCK(pip);
2150                 if ((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_INIT) ||
2151                     (MDI_PI(pip)->pi_phci == NULL)) {
2152                         MDI_PI_UNLOCK(pip);
2153                         MDI_CLIENT_UNLOCK(ct);
2154                         return (MDI_FAILURE);
2155                 }
2156 
2157                 /* Return MDI_BUSY if we have a transient condition */
2158                 if (MDI_PI_IS_TRANSIENT(pip)) {
2159                         MDI_PI_UNLOCK(pip);
2160                         MDI_CLIENT_UNLOCK(ct);
2161                         return (MDI_BUSY);
2162                 }
2163 
2164                 /*
2165                  * Return the path in hold state. Caller should release the
2166                  * lock by calling mdi_rele_path()
2167                  */
2168                 MDI_PI_HOLD(pip);
2169                 MDI_PI_UNLOCK(pip);
2170                 *ret_pip = pip;
2171                 MDI_CLIENT_UNLOCK(ct);
2172                 return (MDI_SUCCESS);
2173         }
2174 
2175         /*
2176          * for non default behavior, bypass current
2177          * load balancing policy and always use LOAD_BALANCE_RR
2178          * except that the start point will be adjusted based
2179          * on the provided start_pip
2180          */
2181         lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR;
2182 
2183         switch (lbp) {
2184         case LOAD_BALANCE_NONE:
2185                 /*
2186                  * Load balancing is None  or Alternate path mode
2187                  * Start looking for a online mdi_pathinfo node starting from
2188                  * last known selected path
2189                  */
2190                 preferred = 1;
2191                 pip = (mdi_pathinfo_t *)ct->ct_path_last;
2192                 if (pip == NULL) {
2193                         pip = head;
2194                 }
2195                 start = pip;
2196                 do {
2197                         MDI_PI_LOCK(pip);
2198                         /*
2199                          * No need to explicitly check if the path is disabled.
2200                          * Since we are checking for state == ONLINE and the
2201                          * same variable is used for DISABLE/ENABLE information.
2202                          */
2203                         if ((MDI_PI(pip)->pi_state  ==
2204                                 MDI_PATHINFO_STATE_ONLINE) &&
2205                                 preferred == MDI_PI(pip)->pi_preferred) {
2206                                 /*
2207                                  * Return the path in hold state. Caller should
2208                                  * release the lock by calling mdi_rele_path()
2209                                  */
2210                                 MDI_PI_HOLD(pip);
2211                                 MDI_PI_UNLOCK(pip);
2212                                 ct->ct_path_last = pip;
2213                                 *ret_pip = pip;
2214                                 MDI_CLIENT_UNLOCK(ct);
2215                                 return (MDI_SUCCESS);
2216                         }
2217 
2218                         /*
2219                          * Path is busy.
2220                          */
2221                         if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2222                             MDI_PI_IS_TRANSIENT(pip))
2223                                 retry = 1;
2224                         /*
2225                          * Keep looking for a next available online path
2226                          */
2227                         next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2228                         if (next == NULL) {
2229                                 next = head;
2230                         }
2231                         MDI_PI_UNLOCK(pip);
2232                         pip = next;
2233                         if (start == pip && preferred) {
2234                                 preferred = 0;
2235                         } else if (start == pip && !preferred) {
2236                                 cont = 0;
2237                         }
2238                 } while (cont);
2239                 break;
2240 
2241         case LOAD_BALANCE_LBA:
2242                 /*
2243                  * Make sure we are looking
2244                  * for an online path. Otherwise, if it is for a STANDBY
2245                  * path request, it will go through and fetch an ONLINE
2246                  * path which is not desirable.
2247                  */
2248                 if ((ct->ct_lb_args != NULL) &&
2249                             (ct->ct_lb_args->region_size) && bp &&
2250                                 (sb || (flags == MDI_SELECT_ONLINE_PATH))) {
2251                         if (i_mdi_lba_lb(ct, ret_pip, bp)
2252                                     == MDI_SUCCESS) {
2253                                 MDI_CLIENT_UNLOCK(ct);
2254                                 return (MDI_SUCCESS);
2255                         }
2256                 }
2257                 /* FALLTHROUGH */
2258         case LOAD_BALANCE_RR:
2259                 /*
2260                  * Load balancing is Round Robin. Start looking for a online
2261                  * mdi_pathinfo node starting from last known selected path
2262                  * as the start point.  If override flags are specified,
2263                  * process accordingly.
2264                  * If the search is already in effect(start_pip not null),
2265                  * then lets just use the same path preference to continue the
2266                  * traversal.
2267                  */
2268 
2269                 if (start_pip != NULL) {
2270                         preferred = MDI_PI(start_pip)->pi_preferred;
2271                 } else {
2272                         preferred = 1;
2273                 }
2274 
2275                 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip;
2276                 if (start == NULL) {
2277                         pip = head;
2278                 } else {
2279                         pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link;
2280                         if (pip == NULL) {
2281                                 if ( flags & MDI_SELECT_NO_PREFERRED) {
2282                                         /*
2283                                          * Return since we hit the end of list
2284                                          */
2285                                         MDI_CLIENT_UNLOCK(ct);
2286                                         return (MDI_NOPATH);
2287                                 }
2288 
2289                                 if (!sb) {
2290                                         if (preferred == 0) {
2291                                                 /*
2292                                                  * Looks like we have completed
2293                                                  * the traversal as preferred
2294                                                  * value is 0. Time to bail out.
2295                                                  */
2296                                                 *ret_pip = NULL;
2297                                                 MDI_CLIENT_UNLOCK(ct);
2298                                                 return (MDI_NOPATH);
2299                                         } else {
2300                                                 /*
2301                                                  * Looks like we reached the
2302                                                  * end of the list. Lets enable
2303                                                  * traversal of non preferred
2304                                                  * paths.
2305                                                  */
2306                                                 preferred = 0;
2307                                         }
2308                                 }
2309                                 pip = head;
2310                         }
2311                 }
2312                 start = pip;
2313                 do {
2314                         MDI_PI_LOCK(pip);
2315                         if (sb) {
2316                                 cond = ((MDI_PI(pip)->pi_state ==
2317                                     MDI_PATHINFO_STATE_ONLINE &&
2318                                         MDI_PI(pip)->pi_preferred ==
2319                                                 preferred) ? 1 : 0);
2320                         } else {
2321                                 if (flags == MDI_SELECT_ONLINE_PATH) {
2322                                         cond = ((MDI_PI(pip)->pi_state ==
2323                                             MDI_PATHINFO_STATE_ONLINE &&
2324                                                 MDI_PI(pip)->pi_preferred ==
2325                                                 preferred) ? 1 : 0);
2326                                 } else if (flags == MDI_SELECT_STANDBY_PATH) {
2327                                         cond = ((MDI_PI(pip)->pi_state ==
2328                                             MDI_PATHINFO_STATE_STANDBY &&
2329                                                 MDI_PI(pip)->pi_preferred ==
2330                                                 preferred) ? 1 : 0);
2331                                 } else if (flags == (MDI_SELECT_ONLINE_PATH |
2332                                     MDI_SELECT_STANDBY_PATH)) {
2333                                         cond = (((MDI_PI(pip)->pi_state ==
2334                                             MDI_PATHINFO_STATE_ONLINE ||
2335                                             (MDI_PI(pip)->pi_state ==
2336                                             MDI_PATHINFO_STATE_STANDBY)) &&
2337                                                 MDI_PI(pip)->pi_preferred ==
2338                                                 preferred) ? 1 : 0);
2339                                 } else if (flags ==
2340                                         (MDI_SELECT_STANDBY_PATH |
2341                                         MDI_SELECT_ONLINE_PATH |
2342                                         MDI_SELECT_USER_DISABLE_PATH)) {
2343                                         cond = (((MDI_PI(pip)->pi_state ==
2344                                             MDI_PATHINFO_STATE_ONLINE ||
2345                                             (MDI_PI(pip)->pi_state ==
2346                                             MDI_PATHINFO_STATE_STANDBY) ||
2347                                                 (MDI_PI(pip)->pi_state ==
2348                                             (MDI_PATHINFO_STATE_ONLINE|
2349                                             MDI_PATHINFO_STATE_USER_DISABLE)) ||
2350                                                 (MDI_PI(pip)->pi_state ==
2351                                             (MDI_PATHINFO_STATE_STANDBY |
2352                                             MDI_PATHINFO_STATE_USER_DISABLE)))&&
2353                                                 MDI_PI(pip)->pi_preferred ==
2354                                                 preferred) ? 1 : 0);
2355                                 } else if (flags ==
2356                                     (MDI_SELECT_STANDBY_PATH |
2357                                     MDI_SELECT_ONLINE_PATH |
2358                                     MDI_SELECT_NO_PREFERRED)) {
2359                                         cond = (((MDI_PI(pip)->pi_state ==
2360                                             MDI_PATHINFO_STATE_ONLINE) ||
2361                                             (MDI_PI(pip)->pi_state ==
2362                                             MDI_PATHINFO_STATE_STANDBY))
2363                                             ? 1 : 0);
2364                                 } else {
2365                                         cond = 0;
2366                                 }
2367                         }
2368                         /*
2369                          * No need to explicitly check if the path is disabled.
2370                          * Since we are checking for state == ONLINE and the
2371                          * same variable is used for DISABLE/ENABLE information.
2372                          */
2373                         if (cond) {
2374                                 /*
2375                                  * Return the path in hold state. Caller should
2376                                  * release the lock by calling mdi_rele_path()
2377                                  */
2378                                 MDI_PI_HOLD(pip);
2379                                 MDI_PI_UNLOCK(pip);
2380                                 if (sb)
2381                                         ct->ct_path_last = pip;
2382                                 *ret_pip = pip;
2383                                 MDI_CLIENT_UNLOCK(ct);
2384                                 return (MDI_SUCCESS);
2385                         }
2386                         /*
2387                          * Path is busy.
2388                          */
2389                         if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2390                             MDI_PI_IS_TRANSIENT(pip))
2391                                 retry = 1;
2392 
2393                         /*
2394                          * Keep looking for a next available online path
2395                          */
2396 do_again:
2397                         next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2398                         if (next == NULL) {
2399                                 if ( flags & MDI_SELECT_NO_PREFERRED) {
2400                                         /*
2401                                          * Bail out since we hit the end of list
2402                                          */
2403                                         MDI_PI_UNLOCK(pip);
2404                                         break;
2405                                 }
2406 
2407                                 if (!sb) {
2408                                         if (preferred == 1) {
2409                                                 /*
2410                                                  * Looks like we reached the
2411                                                  * end of the list. Lets enable
2412                                                  * traversal of non preferred
2413                                                  * paths.
2414                                                  */
2415                                                 preferred = 0;
2416                                                 next = head;
2417                                         } else {
2418                                                 /*
2419                                                  * We have done both the passes
2420                                                  * Preferred as well as for
2421                                                  * Non-preferred. Bail out now.
2422                                                  */
2423                                                 cont = 0;
2424                                         }
2425                                 } else {
2426                                         /*
2427                                          * Standard behavior case.
2428                                          */
2429                                         next = head;
2430                                 }
2431                         }
2432                         MDI_PI_UNLOCK(pip);
2433                         if (cont == 0) {
2434                                 break;
2435                         }
2436                         pip = next;
2437 
2438                         if (!sb) {
2439                                 /*
2440                                  * We need to handle the selection of
2441                                  * non-preferred path in the following
2442                                  * case:
2443                                  *
2444                                  * +------+   +------+   +------+   +-----+
2445                                  * | A : 1| - | B : 1| - | C : 0| - |NULL |
2446                                  * +------+   +------+   +------+   +-----+
2447                                  *
2448                                  * If we start the search with B, we need to
2449                                  * skip beyond B to pick C which is non -
2450                                  * preferred in the second pass. The following
2451                                  * test, if true, will allow us to skip over
2452                                  * the 'start'(B in the example) to select
2453                                  * other non preferred elements.
2454                                  */
2455                                 if ((start_pip != NULL) && (start_pip == pip) &&
2456                                     (MDI_PI(start_pip)->pi_preferred
2457                                     != preferred)) {
2458                                         /*
2459                                          * try again after going past the start
2460                                          * pip
2461                                          */
2462                                         MDI_PI_LOCK(pip);
2463                                         goto do_again;
2464                                 }
2465                         } else {
2466                                 /*
2467                                  * Standard behavior case
2468                                  */
2469                                 if (start == pip && preferred) {
2470                                         /* look for nonpreferred paths */
2471                                         preferred = 0;
2472                                 } else if (start == pip && !preferred) {
2473                                         /*
2474                                          * Exit condition
2475                                          */
2476                                         cont = 0;
2477                                 }
2478                         }
2479                 } while (cont);
2480                 break;
2481         }
2482 
2483         MDI_CLIENT_UNLOCK(ct);
2484         if (retry == 1) {
2485                 return (MDI_BUSY);
2486         } else {
2487                 return (MDI_NOPATH);
2488         }
2489 }
2490 
2491 /*
2492  * For a client, return the next available path to any phci
2493  *
2494  * Note:
2495  *              Caller should hold the branch's devinfo node to get a consistent
2496  *              snap shot of the mdi_pathinfo nodes.
2497  *
2498  *              Please note that even the list is stable the mdi_pathinfo
2499  *              node state and properties are volatile.  The caller should lock
2500  *              and unlock the nodes by calling mdi_pi_lock() and
2501  *              mdi_pi_unlock() functions to get a stable properties.
2502  *
2503  *              If there is a need to use the nodes beyond the hold of the
2504  *              devinfo node period (For ex. I/O), then mdi_pathinfo node
2505  *              need to be held against unexpected removal by calling
2506  *              mdi_hold_path() and should be released by calling
2507  *              mdi_rele_path() on completion.
2508  */
2509 mdi_pathinfo_t *
2510 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip)
2511 {
2512         mdi_client_t *ct;
2513 
2514         if (!MDI_CLIENT(ct_dip))
2515                 return (NULL);
2516 
2517         /*
2518          * Walk through client link
2519          */
2520         ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client;
2521         ASSERT(ct != NULL);
2522 
2523         if (pip == NULL)
2524                 return ((mdi_pathinfo_t *)ct->ct_path_head);
2525 
2526         return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link);
2527 }
2528 
2529 /*
2530  * For a phci, return the next available path to any client
2531  * Note: ditto mdi_get_next_phci_path()
2532  */
2533 mdi_pathinfo_t *
2534 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip)
2535 {
2536         mdi_phci_t *ph;
2537 
2538         if (!MDI_PHCI(ph_dip))
2539                 return (NULL);
2540 
2541         /*
2542          * Walk through pHCI link
2543          */
2544         ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci;
2545         ASSERT(ph != NULL);
2546 
2547         if (pip == NULL)
2548                 return ((mdi_pathinfo_t *)ph->ph_path_head);
2549 
2550         return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link);
2551 }
2552 
2553 /*
2554  * mdi_hold_path():
2555  *              Hold the mdi_pathinfo node against unwanted unexpected free.
2556  * Return Values:
2557  *              None
2558  */
2559 void
2560 mdi_hold_path(mdi_pathinfo_t *pip)
2561 {
2562         if (pip) {
2563                 MDI_PI_LOCK(pip);
2564                 MDI_PI_HOLD(pip);
2565                 MDI_PI_UNLOCK(pip);
2566         }
2567 }
2568 
2569 
2570 /*
2571  * mdi_rele_path():
2572  *              Release the mdi_pathinfo node which was selected
2573  *              through mdi_select_path() mechanism or manually held by
2574  *              calling mdi_hold_path().
2575  * Return Values:
2576  *              None
2577  */
2578 void
2579 mdi_rele_path(mdi_pathinfo_t *pip)
2580 {
2581         if (pip) {
2582                 MDI_PI_LOCK(pip);
2583                 MDI_PI_RELE(pip);
2584                 if (MDI_PI(pip)->pi_ref_cnt == 0) {
2585                         cv_broadcast(&MDI_PI(pip)->pi_ref_cv);
2586                 }
2587                 MDI_PI_UNLOCK(pip);
2588         }
2589 }
2590 
2591 /*
2592  * mdi_pi_lock():
2593  *              Lock the mdi_pathinfo node.
2594  * Note:
2595  *              The caller should release the lock by calling mdi_pi_unlock()
2596  */
2597 void
2598 mdi_pi_lock(mdi_pathinfo_t *pip)
2599 {
2600         ASSERT(pip != NULL);
2601         if (pip) {
2602                 MDI_PI_LOCK(pip);
2603         }
2604 }
2605 
2606 
2607 /*
2608  * mdi_pi_unlock():
2609  *              Unlock the mdi_pathinfo node.
2610  * Note:
2611  *              The mdi_pathinfo node should have been locked with mdi_pi_lock()
2612  */
2613 void
2614 mdi_pi_unlock(mdi_pathinfo_t *pip)
2615 {
2616         ASSERT(pip != NULL);
2617         if (pip) {
2618                 MDI_PI_UNLOCK(pip);
2619         }
2620 }
2621 
2622 /*
2623  * mdi_pi_find():
2624  *              Search the list of mdi_pathinfo nodes attached to the
2625  *              pHCI/Client device node whose path address matches "paddr".
2626  *              Returns a pointer to the mdi_pathinfo node if a matching node is
2627  *              found.
2628  * Return Values:
2629  *              mdi_pathinfo node handle
2630  *              NULL
2631  * Notes:
2632  *              Caller need not hold any locks to call this function.
2633  */
2634 mdi_pathinfo_t *
2635 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr)
2636 {
2637         mdi_phci_t              *ph;
2638         mdi_vhci_t              *vh;
2639         mdi_client_t            *ct;
2640         mdi_pathinfo_t          *pip = NULL;
2641 
2642         MDI_DEBUG(2, (MDI_NOTE, pdip,
2643             "caddr@%s paddr@%s", caddr ? caddr : "", paddr ? paddr : ""));
2644         if ((pdip == NULL) || (paddr == NULL)) {
2645                 return (NULL);
2646         }
2647         ph = i_devi_get_phci(pdip);
2648         if (ph == NULL) {
2649                 /*
2650                  * Invalid pHCI device, Nothing more to do.
2651                  */
2652                 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid phci"));
2653                 return (NULL);
2654         }
2655 
2656         vh = ph->ph_vhci;
2657         if (vh == NULL) {
2658                 /*
2659                  * Invalid vHCI device, Nothing more to do.
2660                  */
2661                 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid vhci"));
2662                 return (NULL);
2663         }
2664 
2665         /*
2666          * Look for pathinfo node identified by paddr.
2667          */
2668         if (caddr == NULL) {
2669                 /*
2670                  * Find a mdi_pathinfo node under pHCI list for a matching
2671                  * unit address.
2672                  */
2673                 MDI_PHCI_LOCK(ph);
2674                 if (MDI_PHCI_IS_OFFLINE(ph)) {
2675                         MDI_DEBUG(2, (MDI_WARN, pdip,
2676                             "offline phci %p", (void *)ph));
2677                         MDI_PHCI_UNLOCK(ph);
2678                         return (NULL);
2679                 }
2680                 pip = (mdi_pathinfo_t *)ph->ph_path_head;
2681 
2682                 while (pip != NULL) {
2683                         if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2684                                 break;
2685                         }
2686                         pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
2687                 }
2688                 MDI_PHCI_UNLOCK(ph);
2689                 MDI_DEBUG(2, (MDI_NOTE, pdip,
2690                     "found %s %p", mdi_pi_spathname(pip), (void *)pip));
2691                 return (pip);
2692         }
2693 
2694         /*
2695          * XXX - Is the rest of the code in this function really necessary?
2696          * The consumers of mdi_pi_find() can search for the desired pathinfo
2697          * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of
2698          * whether the search is based on the pathinfo nodes attached to
2699          * the pHCI or the client node, the result will be the same.
2700          */
2701 
2702         /*
2703          * Find the client device corresponding to 'caddr'
2704          */
2705         MDI_VHCI_CLIENT_LOCK(vh);
2706 
2707         /*
2708          * XXX - Passing NULL to the following function works as long as the
2709          * the client addresses (caddr) are unique per vhci basis.
2710          */
2711         ct = i_mdi_client_find(vh, NULL, caddr);
2712         if (ct == NULL) {
2713                 /*
2714                  * Client not found, Obviously mdi_pathinfo node has not been
2715                  * created yet.
2716                  */
2717                 MDI_VHCI_CLIENT_UNLOCK(vh);
2718                 MDI_DEBUG(2, (MDI_NOTE, pdip,
2719                     "client not found for caddr @%s", caddr ? caddr : ""));
2720                 return (NULL);
2721         }
2722 
2723         /*
2724          * Hold the client lock and look for a mdi_pathinfo node with matching
2725          * pHCI and paddr
2726          */
2727         MDI_CLIENT_LOCK(ct);
2728 
2729         /*
2730          * Release the global mutex as it is no more needed. Note: We always
2731          * respect the locking order while acquiring.
2732          */
2733         MDI_VHCI_CLIENT_UNLOCK(vh);
2734 
2735         pip = (mdi_pathinfo_t *)ct->ct_path_head;
2736         while (pip != NULL) {
2737                 /*
2738                  * Compare the unit address
2739                  */
2740                 if ((MDI_PI(pip)->pi_phci == ph) &&
2741                     strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2742                         break;
2743                 }
2744                 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2745         }
2746         MDI_CLIENT_UNLOCK(ct);
2747         MDI_DEBUG(2, (MDI_NOTE, pdip,
2748             "found: %s %p", mdi_pi_spathname(pip), (void *)pip));
2749         return (pip);
2750 }
2751 
2752 /*
2753  * mdi_pi_alloc():
2754  *              Allocate and initialize a new instance of a mdi_pathinfo node.
2755  *              The mdi_pathinfo node returned by this function identifies a
2756  *              unique device path is capable of having properties attached
2757  *              and passed to mdi_pi_online() to fully attach and online the
2758  *              path and client device node.
2759  *              The mdi_pathinfo node returned by this function must be
2760  *              destroyed using mdi_pi_free() if the path is no longer
2761  *              operational or if the caller fails to attach a client device
2762  *              node when calling mdi_pi_online(). The framework will not free
2763  *              the resources allocated.
2764  *              This function can be called from both interrupt and kernel
2765  *              contexts.  DDI_NOSLEEP flag should be used while calling
2766  *              from interrupt contexts.
2767  * Return Values:
2768  *              MDI_SUCCESS
2769  *              MDI_FAILURE
2770  *              MDI_NOMEM
2771  */
2772 /*ARGSUSED*/
2773 int
2774 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2775     char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip)
2776 {
2777         mdi_vhci_t      *vh;
2778         mdi_phci_t      *ph;
2779         mdi_client_t    *ct;
2780         mdi_pathinfo_t  *pip = NULL;
2781         dev_info_t      *cdip;
2782         int             rv = MDI_NOMEM;
2783         int             path_allocated = 0;
2784 
2785         MDI_DEBUG(2, (MDI_NOTE, pdip,
2786             "cname %s: caddr@%s paddr@%s",
2787             cname ? cname : "", caddr ? caddr : "", paddr ? paddr : ""));
2788 
2789         if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL ||
2790             ret_pip == NULL) {
2791                 /* Nothing more to do */
2792                 return (MDI_FAILURE);
2793         }
2794 
2795         *ret_pip = NULL;
2796 
2797         /* No allocations on detaching pHCI */
2798         if (DEVI_IS_DETACHING(pdip)) {
2799                 /* Invalid pHCI device, return failure */
2800                 MDI_DEBUG(1, (MDI_WARN, pdip,
2801                     "!detaching pHCI=%p", (void *)pdip));
2802                 return (MDI_FAILURE);
2803         }
2804 
2805         ph = i_devi_get_phci(pdip);
2806         ASSERT(ph != NULL);
2807         if (ph == NULL) {
2808                 /* Invalid pHCI device, return failure */
2809                 MDI_DEBUG(1, (MDI_WARN, pdip,
2810                     "!invalid pHCI=%p", (void *)pdip));
2811                 return (MDI_FAILURE);
2812         }
2813 
2814         MDI_PHCI_LOCK(ph);
2815         vh = ph->ph_vhci;
2816         if (vh == NULL) {
2817                 /* Invalid vHCI device, return failure */
2818                 MDI_DEBUG(1, (MDI_WARN, pdip,
2819                     "!invalid vHCI=%p", (void *)pdip));
2820                 MDI_PHCI_UNLOCK(ph);
2821                 return (MDI_FAILURE);
2822         }
2823 
2824         if (MDI_PHCI_IS_READY(ph) == 0) {
2825                 /*
2826                  * Do not allow new node creation when pHCI is in
2827                  * offline/suspended states
2828                  */
2829                 MDI_DEBUG(1, (MDI_WARN, pdip,
2830                     "pHCI=%p is not ready", (void *)ph));
2831                 MDI_PHCI_UNLOCK(ph);
2832                 return (MDI_BUSY);
2833         }
2834         MDI_PHCI_UNSTABLE(ph);
2835         MDI_PHCI_UNLOCK(ph);
2836 
2837         /* look for a matching client, create one if not found */
2838         MDI_VHCI_CLIENT_LOCK(vh);
2839         ct = i_mdi_client_find(vh, cname, caddr);
2840         if (ct == NULL) {
2841                 ct = i_mdi_client_alloc(vh, cname, caddr);
2842                 ASSERT(ct != NULL);
2843         }
2844 
2845         if (ct->ct_dip == NULL) {
2846                 /*
2847                  * Allocate a devinfo node
2848                  */
2849                 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr,
2850                     compatible, ncompatible);
2851                 if (ct->ct_dip == NULL) {
2852                         (void) i_mdi_client_free(vh, ct);
2853                         goto fail;
2854                 }
2855         }
2856         cdip = ct->ct_dip;
2857 
2858         DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT;
2859         DEVI(cdip)->devi_mdi_client = (caddr_t)ct;
2860 
2861         MDI_CLIENT_LOCK(ct);
2862         pip = (mdi_pathinfo_t *)ct->ct_path_head;
2863         while (pip != NULL) {
2864                 /*
2865                  * Compare the unit address
2866                  */
2867                 if ((MDI_PI(pip)->pi_phci == ph) &&
2868                     strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2869                         break;
2870                 }
2871                 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2872         }
2873         MDI_CLIENT_UNLOCK(ct);
2874 
2875         if (pip == NULL) {
2876                 /*
2877                  * This is a new path for this client device.  Allocate and
2878                  * initialize a new pathinfo node
2879                  */
2880                 pip = i_mdi_pi_alloc(ph, paddr, ct);
2881                 ASSERT(pip != NULL);
2882                 path_allocated = 1;
2883         }
2884         rv = MDI_SUCCESS;
2885 
2886 fail:
2887         /*
2888          * Release the global mutex.
2889          */
2890         MDI_VHCI_CLIENT_UNLOCK(vh);
2891 
2892         /*
2893          * Mark the pHCI as stable
2894          */
2895         MDI_PHCI_LOCK(ph);
2896         MDI_PHCI_STABLE(ph);
2897         MDI_PHCI_UNLOCK(ph);
2898         *ret_pip = pip;
2899 
2900         MDI_DEBUG(2, (MDI_NOTE, pdip,
2901             "alloc %s %p", mdi_pi_spathname(pip), (void *)pip));
2902 
2903         if (path_allocated)
2904                 vhcache_pi_add(vh->vh_config, MDI_PI(pip));
2905 
2906         return (rv);
2907 }
2908 
2909 /*ARGSUSED*/
2910 int
2911 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2912     int flags, mdi_pathinfo_t **ret_pip)
2913 {
2914         return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0,
2915             flags, ret_pip));
2916 }
2917 
2918 /*
2919  * i_mdi_pi_alloc():
2920  *              Allocate a mdi_pathinfo node and add to the pHCI path list
2921  * Return Values:
2922  *              mdi_pathinfo
2923  */
2924 /*ARGSUSED*/
2925 static mdi_pathinfo_t *
2926 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct)
2927 {
2928         mdi_pathinfo_t  *pip;
2929         int             ct_circular;
2930         int             ph_circular;
2931         static char     path[MAXPATHLEN];       /* mdi_pathmap_mutex protects */
2932         char            *path_persistent;
2933         int             path_instance;
2934         mod_hash_val_t  hv;
2935 
2936         ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci));
2937 
2938         pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP);
2939         mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL);
2940         MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT |
2941             MDI_PATHINFO_STATE_TRANSIENT;
2942 
2943         if (MDI_PHCI_IS_USER_DISABLED(ph))
2944                 MDI_PI_SET_USER_DISABLE(pip);
2945 
2946         if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph))
2947                 MDI_PI_SET_DRV_DISABLE_TRANS(pip);
2948 
2949         if (MDI_PHCI_IS_DRV_DISABLED(ph))
2950                 MDI_PI_SET_DRV_DISABLE(pip);
2951 
2952         MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT;
2953         cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL);
2954         MDI_PI(pip)->pi_client = ct;
2955         MDI_PI(pip)->pi_phci = ph;
2956         MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP);
2957         (void) strcpy(MDI_PI(pip)->pi_addr, paddr);
2958 
2959         /*
2960          * We form the "path" to the pathinfo node, and see if we have
2961          * already allocated a 'path_instance' for that "path".  If so,
2962          * we use the already allocated 'path_instance'.  If not, we
2963          * allocate a new 'path_instance' and associate it with a copy of
2964          * the "path" string (which is never freed). The association
2965          * between a 'path_instance' this "path" string persists until
2966          * reboot.
2967          */
2968         mutex_enter(&mdi_pathmap_mutex);
2969         (void) ddi_pathname(ph->ph_dip, path);
2970         (void) sprintf(path + strlen(path), "/%s@%s",
2971             mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
2972         if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) {
2973                 path_instance = (uint_t)(intptr_t)hv;
2974         } else {
2975                 /* allocate a new 'path_instance' and persistent "path" */
2976                 path_instance = mdi_pathmap_instance++;
2977                 path_persistent = i_ddi_strdup(path, KM_SLEEP);
2978                 (void) mod_hash_insert(mdi_pathmap_bypath,
2979                     (mod_hash_key_t)path_persistent,
2980                     (mod_hash_val_t)(intptr_t)path_instance);
2981                 (void) mod_hash_insert(mdi_pathmap_byinstance,
2982                     (mod_hash_key_t)(intptr_t)path_instance,
2983                     (mod_hash_val_t)path_persistent);
2984 
2985                 /* create shortpath name */
2986                 (void) snprintf(path, sizeof(path), "%s%d/%s@%s",
2987                     ddi_driver_name(ph->ph_dip), ddi_get_instance(ph->ph_dip),
2988                     mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
2989                 path_persistent = i_ddi_strdup(path, KM_SLEEP);
2990                 (void) mod_hash_insert(mdi_pathmap_sbyinstance,
2991                     (mod_hash_key_t)(intptr_t)path_instance,
2992                     (mod_hash_val_t)path_persistent);
2993         }
2994         mutex_exit(&mdi_pathmap_mutex);
2995         MDI_PI(pip)->pi_path_instance = path_instance;
2996 
2997         (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP);
2998         ASSERT(MDI_PI(pip)->pi_prop != NULL);
2999         MDI_PI(pip)->pi_pprivate = NULL;
3000         MDI_PI(pip)->pi_cprivate = NULL;
3001         MDI_PI(pip)->pi_vprivate = NULL;
3002         MDI_PI(pip)->pi_client_link = NULL;
3003         MDI_PI(pip)->pi_phci_link = NULL;
3004         MDI_PI(pip)->pi_ref_cnt = 0;
3005         MDI_PI(pip)->pi_kstats = NULL;
3006         MDI_PI(pip)->pi_preferred = 1;
3007         cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL);
3008 
3009         /*
3010          * Lock both dev_info nodes against changes in parallel.
3011          *
3012          * The ndi_devi_enter(Client), is atypical since the client is a leaf.
3013          * This atypical operation is done to synchronize pathinfo nodes
3014          * during devinfo snapshot (see di_register_pip) by 'pretending' that
3015          * the pathinfo nodes are children of the Client.
3016          */
3017         ndi_devi_enter(ct->ct_dip, &ct_circular);
3018         ndi_devi_enter(ph->ph_dip, &ph_circular);
3019 
3020         i_mdi_phci_add_path(ph, pip);
3021         i_mdi_client_add_path(ct, pip);
3022 
3023         ndi_devi_exit(ph->ph_dip, ph_circular);
3024         ndi_devi_exit(ct->ct_dip, ct_circular);
3025 
3026         return (pip);
3027 }
3028 
3029 /*
3030  * mdi_pi_pathname_by_instance():
3031  *      Lookup of "path" by 'path_instance'. Return "path".
3032  *      NOTE: returned "path" remains valid forever (until reboot).
3033  */
3034 char *
3035 mdi_pi_pathname_by_instance(int path_instance)
3036 {
3037         char            *path;
3038         mod_hash_val_t  hv;
3039 
3040         /* mdi_pathmap lookup of "path" by 'path_instance' */
3041         mutex_enter(&mdi_pathmap_mutex);
3042         if (mod_hash_find(mdi_pathmap_byinstance,
3043             (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
3044                 path = (char *)hv;
3045         else
3046                 path = NULL;
3047         mutex_exit(&mdi_pathmap_mutex);
3048         return (path);
3049 }
3050 
3051 /*
3052  * mdi_pi_spathname_by_instance():
3053  *      Lookup of "shortpath" by 'path_instance'. Return "shortpath".
3054  *      NOTE: returned "shortpath" remains valid forever (until reboot).
3055  */
3056 char *
3057 mdi_pi_spathname_by_instance(int path_instance)
3058 {
3059         char            *path;
3060         mod_hash_val_t  hv;
3061 
3062         /* mdi_pathmap lookup of "path" by 'path_instance' */
3063         mutex_enter(&mdi_pathmap_mutex);
3064         if (mod_hash_find(mdi_pathmap_sbyinstance,
3065             (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
3066                 path = (char *)hv;
3067         else
3068                 path = NULL;
3069         mutex_exit(&mdi_pathmap_mutex);
3070         return (path);
3071 }
3072 
3073 
3074 /*
3075  * i_mdi_phci_add_path():
3076  *              Add a mdi_pathinfo node to pHCI list.
3077  * Notes:
3078  *              Caller should per-pHCI mutex
3079  */
3080 static void
3081 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3082 {
3083         ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3084 
3085         MDI_PHCI_LOCK(ph);
3086         if (ph->ph_path_head == NULL) {
3087                 ph->ph_path_head = pip;
3088         } else {
3089                 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip);
3090         }
3091         ph->ph_path_tail = pip;
3092         ph->ph_path_count++;
3093         MDI_PHCI_UNLOCK(ph);
3094 }
3095 
3096 /*
3097  * i_mdi_client_add_path():
3098  *              Add mdi_pathinfo node to client list
3099  */
3100 static void
3101 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3102 {
3103         ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3104 
3105         MDI_CLIENT_LOCK(ct);
3106         if (ct->ct_path_head == NULL) {
3107                 ct->ct_path_head = pip;
3108         } else {
3109                 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip);
3110         }
3111         ct->ct_path_tail = pip;
3112         ct->ct_path_count++;
3113         MDI_CLIENT_UNLOCK(ct);
3114 }
3115 
3116 /*
3117  * mdi_pi_free():
3118  *              Free the mdi_pathinfo node and also client device node if this
3119  *              is the last path to the device
3120  * Return Values:
3121  *              MDI_SUCCESS
3122  *              MDI_FAILURE
3123  *              MDI_BUSY
3124  */
3125 /*ARGSUSED*/
3126 int
3127 mdi_pi_free(mdi_pathinfo_t *pip, int flags)
3128 {
3129         int             rv;
3130         mdi_vhci_t      *vh;
3131         mdi_phci_t      *ph;
3132         mdi_client_t    *ct;
3133         int             (*f)();
3134         int             client_held = 0;
3135 
3136         MDI_PI_LOCK(pip);
3137         ph = MDI_PI(pip)->pi_phci;
3138         ASSERT(ph != NULL);
3139         if (ph == NULL) {
3140                 /*
3141                  * Invalid pHCI device, return failure
3142                  */
3143                 MDI_DEBUG(1, (MDI_WARN, NULL,
3144                     "!invalid pHCI: pip %s %p",
3145                     mdi_pi_spathname(pip), (void *)pip));
3146                 MDI_PI_UNLOCK(pip);
3147                 return (MDI_FAILURE);
3148         }
3149 
3150         vh = ph->ph_vhci;
3151         ASSERT(vh != NULL);
3152         if (vh == NULL) {
3153                 /* Invalid pHCI device, return failure */
3154                 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3155                     "!invalid vHCI: pip %s %p",
3156                     mdi_pi_spathname(pip), (void *)pip));
3157                 MDI_PI_UNLOCK(pip);
3158                 return (MDI_FAILURE);
3159         }
3160 
3161         ct = MDI_PI(pip)->pi_client;
3162         ASSERT(ct != NULL);
3163         if (ct == NULL) {
3164                 /*
3165                  * Invalid Client device, return failure
3166                  */
3167                 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3168                     "!invalid client: pip %s %p",
3169                     mdi_pi_spathname(pip), (void *)pip));
3170                 MDI_PI_UNLOCK(pip);
3171                 return (MDI_FAILURE);
3172         }
3173 
3174         /*
3175          * Check to see for busy condition.  A mdi_pathinfo can only be freed
3176          * if the node state is either offline or init and the reference count
3177          * is zero.
3178          */
3179         if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) ||
3180             MDI_PI_IS_INITING(pip))) {
3181                 /*
3182                  * Node is busy
3183                  */
3184                 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3185                     "!busy: pip %s %p", mdi_pi_spathname(pip), (void *)pip));
3186                 MDI_PI_UNLOCK(pip);
3187                 return (MDI_BUSY);
3188         }
3189 
3190         while (MDI_PI(pip)->pi_ref_cnt != 0) {
3191                 /*
3192                  * Give a chance for pending I/Os to complete.
3193                  */
3194                 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3195                     "!%d cmds still pending on path: %s %p",
3196                     MDI_PI(pip)->pi_ref_cnt,
3197                     mdi_pi_spathname(pip), (void *)pip));
3198                 if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
3199                     &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
3200                     TR_CLOCK_TICK) == -1) {
3201                         /*
3202                          * The timeout time reached without ref_cnt being zero
3203                          * being signaled.
3204                          */
3205                         MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3206                             "!Timeout reached on path %s %p without the cond",
3207                             mdi_pi_spathname(pip), (void *)pip));
3208                         MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3209                             "!%d cmds still pending on path %s %p",
3210                             MDI_PI(pip)->pi_ref_cnt,
3211                             mdi_pi_spathname(pip), (void *)pip));
3212                         MDI_PI_UNLOCK(pip);
3213                         return (MDI_BUSY);
3214                 }
3215         }
3216         if (MDI_PI(pip)->pi_pm_held) {
3217                 client_held = 1;
3218         }
3219         MDI_PI_UNLOCK(pip);
3220 
3221         vhcache_pi_remove(vh->vh_config, MDI_PI(pip));
3222 
3223         MDI_CLIENT_LOCK(ct);
3224 
3225         /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */
3226         MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct);
3227 
3228         /*
3229          * Wait till failover is complete before removing this node.
3230          */
3231         while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3232                 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3233 
3234         MDI_CLIENT_UNLOCK(ct);
3235         MDI_VHCI_CLIENT_LOCK(vh);
3236         MDI_CLIENT_LOCK(ct);
3237         MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct);
3238 
3239         if (!MDI_PI_IS_INITING(pip)) {
3240                 f = vh->vh_ops->vo_pi_uninit;
3241                 if (f != NULL) {
3242                         rv = (*f)(vh->vh_dip, pip, 0);
3243                 }
3244         } else
3245                 rv = MDI_SUCCESS;
3246 
3247         /*
3248          * If vo_pi_uninit() completed successfully.
3249          */
3250         if (rv == MDI_SUCCESS) {
3251                 if (client_held) {
3252                         MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3253                             "i_mdi_pm_rele_client\n"));
3254                         i_mdi_pm_rele_client(ct, 1);
3255                 }
3256                 i_mdi_pi_free(ph, pip, ct);
3257                 if (ct->ct_path_count == 0) {
3258                         /*
3259                          * Client lost its last path.
3260                          * Clean up the client device
3261                          */
3262                         ct->ct_flags |= flags;
3263                         MDI_CLIENT_UNLOCK(ct);
3264                         (void) i_mdi_client_free(ct->ct_vhci, ct);
3265                         MDI_VHCI_CLIENT_UNLOCK(vh);
3266                         return (rv);
3267                 }
3268         }
3269         MDI_CLIENT_UNLOCK(ct);
3270         MDI_VHCI_CLIENT_UNLOCK(vh);
3271 
3272         if (rv == MDI_FAILURE)
3273                 vhcache_pi_add(vh->vh_config, MDI_PI(pip));
3274 
3275         return (rv);
3276 }
3277 
3278 /*
3279  * i_mdi_pi_free():
3280  *              Free the mdi_pathinfo node
3281  */
3282 static void
3283 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct)
3284 {
3285         int     ct_circular;
3286         int     ph_circular;
3287 
3288         ASSERT(MDI_CLIENT_LOCKED(ct));
3289 
3290         /*
3291          * remove any per-path kstats
3292          */
3293         i_mdi_pi_kstat_destroy(pip);
3294 
3295         /* See comments in i_mdi_pi_alloc() */
3296         ndi_devi_enter(ct->ct_dip, &ct_circular);
3297         ndi_devi_enter(ph->ph_dip, &ph_circular);
3298 
3299         i_mdi_client_remove_path(ct, pip);
3300         i_mdi_phci_remove_path(ph, pip);
3301 
3302         ndi_devi_exit(ph->ph_dip, ph_circular);
3303         ndi_devi_exit(ct->ct_dip, ct_circular);
3304 
3305         mutex_destroy(&MDI_PI(pip)->pi_mutex);
3306         cv_destroy(&MDI_PI(pip)->pi_state_cv);
3307         cv_destroy(&MDI_PI(pip)->pi_ref_cv);
3308         if (MDI_PI(pip)->pi_addr) {
3309                 kmem_free(MDI_PI(pip)->pi_addr,
3310                     strlen(MDI_PI(pip)->pi_addr) + 1);
3311                 MDI_PI(pip)->pi_addr = NULL;
3312         }
3313 
3314         if (MDI_PI(pip)->pi_prop) {
3315                 (void) nvlist_free(MDI_PI(pip)->pi_prop);
3316                 MDI_PI(pip)->pi_prop = NULL;
3317         }
3318         kmem_free(pip, sizeof (struct mdi_pathinfo));
3319 }
3320 
3321 
3322 /*
3323  * i_mdi_phci_remove_path():
3324  *              Remove a mdi_pathinfo node from pHCI list.
3325  * Notes:
3326  *              Caller should hold per-pHCI mutex
3327  */
3328 static void
3329 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3330 {
3331         mdi_pathinfo_t  *prev = NULL;
3332         mdi_pathinfo_t  *path = NULL;
3333 
3334         ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3335 
3336         MDI_PHCI_LOCK(ph);
3337         path = ph->ph_path_head;
3338         while (path != NULL) {
3339                 if (path == pip) {
3340                         break;
3341                 }
3342                 prev = path;
3343                 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3344         }
3345 
3346         if (path) {
3347                 ph->ph_path_count--;
3348                 if (prev) {
3349                         MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link;
3350                 } else {
3351                         ph->ph_path_head =
3352                             (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3353                 }
3354                 if (ph->ph_path_tail == path) {
3355                         ph->ph_path_tail = prev;
3356                 }
3357         }
3358 
3359         /*
3360          * Clear the pHCI link
3361          */
3362         MDI_PI(pip)->pi_phci_link = NULL;
3363         MDI_PI(pip)->pi_phci = NULL;
3364         MDI_PHCI_UNLOCK(ph);
3365 }
3366 
3367 /*
3368  * i_mdi_client_remove_path():
3369  *              Remove a mdi_pathinfo node from client path list.
3370  */
3371 static void
3372 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3373 {
3374         mdi_pathinfo_t  *prev = NULL;
3375         mdi_pathinfo_t  *path;
3376 
3377         ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3378 
3379         ASSERT(MDI_CLIENT_LOCKED(ct));
3380         path = ct->ct_path_head;
3381         while (path != NULL) {
3382                 if (path == pip) {
3383                         break;
3384                 }
3385                 prev = path;
3386                 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3387         }
3388 
3389         if (path) {
3390                 ct->ct_path_count--;
3391                 if (prev) {
3392                         MDI_PI(prev)->pi_client_link =
3393                             MDI_PI(path)->pi_client_link;
3394                 } else {
3395                         ct->ct_path_head =
3396                             (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3397                 }
3398                 if (ct->ct_path_tail == path) {
3399                         ct->ct_path_tail = prev;
3400                 }
3401                 if (ct->ct_path_last == path) {
3402                         ct->ct_path_last = ct->ct_path_head;
3403                 }
3404         }
3405         MDI_PI(pip)->pi_client_link = NULL;
3406         MDI_PI(pip)->pi_client = NULL;
3407 }
3408 
3409 /*
3410  * i_mdi_pi_state_change():
3411  *              online a mdi_pathinfo node
3412  *
3413  * Return Values:
3414  *              MDI_SUCCESS
3415  *              MDI_FAILURE
3416  */
3417 /*ARGSUSED*/
3418 static int
3419 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag)
3420 {
3421         int             rv = MDI_SUCCESS;
3422         mdi_vhci_t      *vh;
3423         mdi_phci_t      *ph;
3424         mdi_client_t    *ct;
3425         int             (*f)();
3426         dev_info_t      *cdip;
3427 
3428         MDI_PI_LOCK(pip);
3429 
3430         ph = MDI_PI(pip)->pi_phci;
3431         ASSERT(ph);
3432         if (ph == NULL) {
3433                 /*
3434                  * Invalid pHCI device, fail the request
3435                  */
3436                 MDI_PI_UNLOCK(pip);
3437                 MDI_DEBUG(1, (MDI_WARN, NULL,
3438                     "!invalid phci: pip %s %p",
3439                     mdi_pi_spathname(pip), (void *)pip));
3440                 return (MDI_FAILURE);
3441         }
3442 
3443         vh = ph->ph_vhci;
3444         ASSERT(vh);
3445         if (vh == NULL) {
3446                 /*
3447                  * Invalid vHCI device, fail the request
3448                  */
3449                 MDI_PI_UNLOCK(pip);
3450                 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3451                     "!invalid vhci: pip %s %p",
3452                     mdi_pi_spathname(pip), (void *)pip));
3453                 return (MDI_FAILURE);
3454         }
3455 
3456         ct = MDI_PI(pip)->pi_client;
3457         ASSERT(ct != NULL);
3458         if (ct == NULL) {
3459                 /*
3460                  * Invalid client device, fail the request
3461                  */
3462                 MDI_PI_UNLOCK(pip);
3463                 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3464                     "!invalid client: pip %s %p",
3465                     mdi_pi_spathname(pip), (void *)pip));
3466                 return (MDI_FAILURE);
3467         }
3468 
3469         /*
3470          * If this path has not been initialized yet, Callback vHCI driver's
3471          * pathinfo node initialize entry point
3472          */
3473 
3474         if (MDI_PI_IS_INITING(pip)) {
3475                 MDI_PI_UNLOCK(pip);
3476                 f = vh->vh_ops->vo_pi_init;
3477                 if (f != NULL) {
3478                         rv = (*f)(vh->vh_dip, pip, 0);
3479                         if (rv != MDI_SUCCESS) {
3480                                 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3481                                     "!vo_pi_init failed: vHCI %p, pip %s %p",
3482                                     (void *)vh, mdi_pi_spathname(pip),
3483                                     (void *)pip));
3484                                 return (MDI_FAILURE);
3485                         }
3486                 }
3487                 MDI_PI_LOCK(pip);
3488                 MDI_PI_CLEAR_TRANSIENT(pip);
3489         }
3490 
3491         /*
3492          * Do not allow state transition when pHCI is in offline/suspended
3493          * states
3494          */
3495         i_mdi_phci_lock(ph, pip);
3496         if (MDI_PHCI_IS_READY(ph) == 0) {
3497                 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3498                     "!pHCI not ready, pHCI=%p", (void *)ph));
3499                 MDI_PI_UNLOCK(pip);
3500                 i_mdi_phci_unlock(ph);
3501                 return (MDI_BUSY);
3502         }
3503         MDI_PHCI_UNSTABLE(ph);
3504         i_mdi_phci_unlock(ph);
3505 
3506         /*
3507          * Check if mdi_pathinfo state is in transient state.
3508          * If yes, offlining is in progress and wait till transient state is
3509          * cleared.
3510          */
3511         if (MDI_PI_IS_TRANSIENT(pip)) {
3512                 while (MDI_PI_IS_TRANSIENT(pip)) {
3513                         cv_wait(&MDI_PI(pip)->pi_state_cv,
3514                             &MDI_PI(pip)->pi_mutex);
3515                 }
3516         }
3517 
3518         /*
3519          * Grab the client lock in reverse order sequence and release the
3520          * mdi_pathinfo mutex.
3521          */
3522         i_mdi_client_lock(ct, pip);
3523         MDI_PI_UNLOCK(pip);
3524 
3525         /*
3526          * Wait till failover state is cleared
3527          */
3528         while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3529                 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3530 
3531         /*
3532          * Mark the mdi_pathinfo node state as transient
3533          */
3534         MDI_PI_LOCK(pip);
3535         switch (state) {
3536         case MDI_PATHINFO_STATE_ONLINE:
3537                 MDI_PI_SET_ONLINING(pip);
3538                 break;
3539 
3540         case MDI_PATHINFO_STATE_STANDBY:
3541                 MDI_PI_SET_STANDBYING(pip);
3542                 break;
3543 
3544         case MDI_PATHINFO_STATE_FAULT:
3545                 /*
3546                  * Mark the pathinfo state as FAULTED
3547                  */
3548                 MDI_PI_SET_FAULTING(pip);
3549                 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR);
3550                 break;
3551 
3552         case MDI_PATHINFO_STATE_OFFLINE:
3553                 /*
3554                  * ndi_devi_offline() cannot hold pip or ct locks.
3555                  */
3556                 MDI_PI_UNLOCK(pip);
3557 
3558                 /*
3559                  * If this is a user initiated path online->offline operation
3560                  * who's success would transition a client from DEGRADED to
3561                  * FAILED then only proceed if we can offline the client first.
3562                  */
3563                 cdip = ct->ct_dip;
3564                 if ((flag & NDI_USER_REQ) &&
3565                     MDI_PI_IS_ONLINE(pip) &&
3566                     (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) {
3567                         i_mdi_client_unlock(ct);
3568                         rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN);
3569                         if (rv != NDI_SUCCESS) {
3570                                 /*
3571                                  * Convert to MDI error code
3572                                  */
3573                                 switch (rv) {
3574                                 case NDI_BUSY:
3575                                         rv = MDI_BUSY;
3576                                         break;
3577                                 default:
3578                                         rv = MDI_FAILURE;
3579                                         break;
3580                                 }
3581                                 goto state_change_exit;
3582                         } else {
3583                                 i_mdi_client_lock(ct, NULL);
3584                         }
3585                 }
3586                 /*
3587                  * Mark the mdi_pathinfo node state as transient
3588                  */
3589                 MDI_PI_LOCK(pip);
3590                 MDI_PI_SET_OFFLINING(pip);
3591                 break;
3592         }
3593         MDI_PI_UNLOCK(pip);
3594         MDI_CLIENT_UNSTABLE(ct);
3595         i_mdi_client_unlock(ct);
3596 
3597         f = vh->vh_ops->vo_pi_state_change;
3598         if (f != NULL)
3599                 rv = (*f)(vh->vh_dip, pip, state, 0, flag);
3600 
3601         MDI_CLIENT_LOCK(ct);
3602         MDI_PI_LOCK(pip);
3603         if (rv == MDI_NOT_SUPPORTED) {
3604                 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct);
3605         }
3606         if (rv != MDI_SUCCESS) {
3607                 MDI_DEBUG(2, (MDI_WARN, ct->ct_dip,
3608                     "vo_pi_state_change failed: rv %x", rv));
3609         }
3610         if (MDI_PI_IS_TRANSIENT(pip)) {
3611                 if (rv == MDI_SUCCESS) {
3612                         MDI_PI_CLEAR_TRANSIENT(pip);
3613                 } else {
3614                         MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
3615                 }
3616         }
3617 
3618         /*
3619          * Wake anyone waiting for this mdi_pathinfo node
3620          */
3621         cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3622         MDI_PI_UNLOCK(pip);
3623 
3624         /*
3625          * Mark the client device as stable
3626          */
3627         MDI_CLIENT_STABLE(ct);
3628         if (rv == MDI_SUCCESS) {
3629                 if (ct->ct_unstable == 0) {
3630                         cdip = ct->ct_dip;
3631 
3632                         /*
3633                          * Onlining the mdi_pathinfo node will impact the
3634                          * client state Update the client and dev_info node
3635                          * state accordingly
3636                          */
3637                         rv = NDI_SUCCESS;
3638                         i_mdi_client_update_state(ct);
3639                         switch (MDI_CLIENT_STATE(ct)) {
3640                         case MDI_CLIENT_STATE_OPTIMAL:
3641                         case MDI_CLIENT_STATE_DEGRADED:
3642                                 if (cdip && !i_ddi_devi_attached(cdip) &&
3643                                     ((state == MDI_PATHINFO_STATE_ONLINE) ||
3644                                     (state == MDI_PATHINFO_STATE_STANDBY))) {
3645 
3646                                         /*
3647                                          * Must do ndi_devi_online() through
3648                                          * hotplug thread for deferred
3649                                          * attach mechanism to work
3650                                          */
3651                                         MDI_CLIENT_UNLOCK(ct);
3652                                         rv = ndi_devi_online(cdip, 0);
3653                                         MDI_CLIENT_LOCK(ct);
3654                                         if ((rv != NDI_SUCCESS) &&
3655                                             (MDI_CLIENT_STATE(ct) ==
3656                                             MDI_CLIENT_STATE_DEGRADED)) {
3657                                                 MDI_DEBUG(1, (MDI_WARN, cdip,
3658                                                     "!ndi_devi_online failed "
3659                                                     "error %x", rv));
3660                                         }
3661                                         rv = NDI_SUCCESS;
3662                                 }
3663                                 break;
3664 
3665                         case MDI_CLIENT_STATE_FAILED:
3666                                 /*
3667                                  * This is the last path case for
3668                                  * non-user initiated events.
3669                                  */
3670                                 if ((flag & NDI_USER_REQ) ||
3671                                     cdip == NULL || i_ddi_node_state(cdip) <
3672                                     DS_INITIALIZED)
3673                                         break;
3674 
3675                                 MDI_CLIENT_UNLOCK(ct);
3676                                 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN |
3677                                     NDI_DEVI_GONE);
3678                                 MDI_CLIENT_LOCK(ct);
3679 
3680                                 if (rv != NDI_SUCCESS) {
3681                                         /*
3682                                          * Reset client flags to online as the
3683                                          * path could not be offlined.
3684                                          */
3685                                         MDI_DEBUG(1, (MDI_WARN, cdip,
3686                                             "!ndi_devi_offline failed: %d",
3687                                             rv));
3688                                         MDI_CLIENT_SET_ONLINE(ct);
3689                                 }
3690                                 break;
3691                         }
3692                         /*
3693                          * Convert to MDI error code
3694                          */
3695                         switch (rv) {
3696                         case NDI_SUCCESS:
3697                                 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3698                                 i_mdi_report_path_state(ct, pip);
3699                                 rv = MDI_SUCCESS;
3700                                 break;
3701                         case NDI_BUSY:
3702                                 rv = MDI_BUSY;
3703                                 break;
3704                         default:
3705                                 rv = MDI_FAILURE;
3706                                 break;
3707                         }
3708                 }
3709         }
3710         MDI_CLIENT_UNLOCK(ct);
3711 
3712 state_change_exit:
3713         /*
3714          * Mark the pHCI as stable again.
3715          */
3716         MDI_PHCI_LOCK(ph);
3717         MDI_PHCI_STABLE(ph);
3718         MDI_PHCI_UNLOCK(ph);
3719         return (rv);
3720 }
3721 
3722 /*
3723  * mdi_pi_online():
3724  *              Place the path_info node in the online state.  The path is
3725  *              now available to be selected by mdi_select_path() for
3726  *              transporting I/O requests to client devices.
3727  * Return Values:
3728  *              MDI_SUCCESS
3729  *              MDI_FAILURE
3730  */
3731 int
3732 mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3733 {
3734         mdi_client_t    *ct = MDI_PI(pip)->pi_client;
3735         int             client_held = 0;
3736         int             rv;
3737 
3738         ASSERT(ct != NULL);
3739         rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags);
3740         if (rv != MDI_SUCCESS)
3741                 return (rv);
3742 
3743         MDI_PI_LOCK(pip);
3744         if (MDI_PI(pip)->pi_pm_held == 0) {
3745                 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3746                     "i_mdi_pm_hold_pip %p", (void *)pip));
3747                 i_mdi_pm_hold_pip(pip);
3748                 client_held = 1;
3749         }
3750         MDI_PI_UNLOCK(pip);
3751 
3752         if (client_held) {
3753                 MDI_CLIENT_LOCK(ct);
3754                 if (ct->ct_power_cnt == 0) {
3755                         rv = i_mdi_power_all_phci(ct);
3756                 }
3757 
3758                 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3759                     "i_mdi_pm_hold_client %p", (void *)ct));
3760                 i_mdi_pm_hold_client(ct, 1);
3761                 MDI_CLIENT_UNLOCK(ct);
3762         }
3763 
3764         return (rv);
3765 }
3766 
3767 /*
3768  * mdi_pi_standby():
3769  *              Place the mdi_pathinfo node in standby state
3770  *
3771  * Return Values:
3772  *              MDI_SUCCESS
3773  *              MDI_FAILURE
3774  */
3775 int
3776 mdi_pi_standby(mdi_pathinfo_t *pip, int flags)
3777 {
3778         return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags));
3779 }
3780 
3781 /*
3782  * mdi_pi_fault():
3783  *              Place the mdi_pathinfo node in fault'ed state
3784  * Return Values:
3785  *              MDI_SUCCESS
3786  *              MDI_FAILURE
3787  */
3788 int
3789 mdi_pi_fault(mdi_pathinfo_t *pip, int flags)
3790 {
3791         return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags));
3792 }
3793 
3794 /*
3795  * mdi_pi_offline():
3796  *              Offline a mdi_pathinfo node.
3797  * Return Values:
3798  *              MDI_SUCCESS
3799  *              MDI_FAILURE
3800  */
3801 int
3802 mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3803 {
3804         int     ret, client_held = 0;
3805         mdi_client_t    *ct;
3806 
3807         /*
3808          * Original code overloaded NDI_DEVI_REMOVE to this interface, and
3809          * used it to mean "user initiated operation" (i.e. devctl). Callers
3810          * should now just use NDI_USER_REQ.
3811          */
3812         if (flags & NDI_DEVI_REMOVE) {
3813                 flags &= ~NDI_DEVI_REMOVE;
3814                 flags |= NDI_USER_REQ;
3815         }
3816 
3817         ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags);
3818 
3819         if (ret == MDI_SUCCESS) {
3820                 MDI_PI_LOCK(pip);
3821                 if (MDI_PI(pip)->pi_pm_held) {
3822                         client_held = 1;
3823                 }
3824                 MDI_PI_UNLOCK(pip);
3825 
3826                 if (client_held) {
3827                         ct = MDI_PI(pip)->pi_client;
3828                         MDI_CLIENT_LOCK(ct);
3829                         MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3830                             "i_mdi_pm_rele_client\n"));
3831                         i_mdi_pm_rele_client(ct, 1);
3832                         MDI_CLIENT_UNLOCK(ct);
3833                 }
3834         }
3835 
3836         return (ret);
3837 }
3838 
3839 /*
3840  * i_mdi_pi_offline():
3841  *              Offline a mdi_pathinfo node and call the vHCI driver's callback
3842  */
3843 static int
3844 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3845 {
3846         dev_info_t      *vdip = NULL;
3847         mdi_vhci_t      *vh = NULL;
3848         mdi_client_t    *ct = NULL;
3849         int             (*f)();
3850         int             rv;
3851 
3852         MDI_PI_LOCK(pip);
3853         ct = MDI_PI(pip)->pi_client;
3854         ASSERT(ct != NULL);
3855 
3856         while (MDI_PI(pip)->pi_ref_cnt != 0) {
3857                 /*
3858                  * Give a chance for pending I/Os to complete.
3859                  */
3860                 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3861                     "!%d cmds still pending on path %s %p",
3862                     MDI_PI(pip)->pi_ref_cnt, mdi_pi_spathname(pip),
3863                     (void *)pip));
3864                 if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
3865                     &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
3866                     TR_CLOCK_TICK) == -1) {
3867                         /*
3868                          * The timeout time reached without ref_cnt being zero
3869                          * being signaled.
3870                          */
3871                         MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3872                             "!Timeout reached on path %s %p without the cond",
3873                             mdi_pi_spathname(pip), (void *)pip));
3874                         MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3875                             "!%d cmds still pending on path %s %p",
3876                             MDI_PI(pip)->pi_ref_cnt,
3877                             mdi_pi_spathname(pip), (void *)pip));
3878                 }
3879         }
3880         vh = ct->ct_vhci;
3881         vdip = vh->vh_dip;
3882 
3883         /*
3884          * Notify vHCI that has registered this event
3885          */
3886         ASSERT(vh->vh_ops);
3887         f = vh->vh_ops->vo_pi_state_change;
3888 
3889         if (f != NULL) {
3890                 MDI_PI_UNLOCK(pip);
3891                 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0,
3892                     flags)) != MDI_SUCCESS) {
3893                         MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3894                             "!vo_path_offline failed: vdip %s%d %p: path %s %p",
3895                             ddi_driver_name(vdip), ddi_get_instance(vdip),
3896                             (void *)vdip, mdi_pi_spathname(pip), (void *)pip));
3897                 }
3898                 MDI_PI_LOCK(pip);
3899         }
3900 
3901         /*
3902          * Set the mdi_pathinfo node state and clear the transient condition
3903          */
3904         MDI_PI_SET_OFFLINE(pip);
3905         cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3906         MDI_PI_UNLOCK(pip);
3907 
3908         MDI_CLIENT_LOCK(ct);
3909         if (rv == MDI_SUCCESS) {
3910                 if (ct->ct_unstable == 0) {
3911                         dev_info_t      *cdip = ct->ct_dip;
3912 
3913                         /*
3914                          * Onlining the mdi_pathinfo node will impact the
3915                          * client state Update the client and dev_info node
3916                          * state accordingly
3917                          */
3918                         i_mdi_client_update_state(ct);
3919                         rv = NDI_SUCCESS;
3920                         if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
3921                                 if (cdip &&
3922                                     (i_ddi_node_state(cdip) >=
3923                                     DS_INITIALIZED)) {
3924                                         MDI_CLIENT_UNLOCK(ct);
3925                                         rv = ndi_devi_offline(cdip,
3926                                             NDI_DEVFS_CLEAN);
3927                                         MDI_CLIENT_LOCK(ct);
3928                                         if (rv != NDI_SUCCESS) {
3929                                                 /*
3930                                                  * ndi_devi_offline failed.
3931                                                  * Reset client flags to
3932                                                  * online.
3933                                                  */
3934                                                 MDI_DEBUG(4, (MDI_WARN, cdip,
3935                                                     "ndi_devi_offline failed: "
3936                                                     "error %x", rv));
3937                                                 MDI_CLIENT_SET_ONLINE(ct);
3938                                         }
3939                                 }
3940                         }
3941                         /*
3942                          * Convert to MDI error code
3943                          */
3944                         switch (rv) {
3945                         case NDI_SUCCESS:
3946                                 rv = MDI_SUCCESS;
3947                                 break;
3948                         case NDI_BUSY:
3949                                 rv = MDI_BUSY;
3950                                 break;
3951                         default:
3952                                 rv = MDI_FAILURE;
3953                                 break;
3954                         }
3955                 }
3956                 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3957                 i_mdi_report_path_state(ct, pip);
3958         }
3959 
3960         MDI_CLIENT_UNLOCK(ct);
3961 
3962         /*
3963          * Change in the mdi_pathinfo node state will impact the client state
3964          */
3965         MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
3966             "ct = %p pip = %p", (void *)ct, (void *)pip));
3967         return (rv);
3968 }
3969 
3970 /*
3971  * i_mdi_pi_online():
3972  *              Online a mdi_pathinfo node and call the vHCI driver's callback
3973  */
3974 static int
3975 i_mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3976 {
3977         mdi_vhci_t      *vh = NULL;
3978         mdi_client_t    *ct = NULL;
3979         mdi_phci_t      *ph;
3980         int             (*f)();
3981         int             rv;
3982 
3983         MDI_PI_LOCK(pip);
3984         ph = MDI_PI(pip)->pi_phci;
3985         vh = ph->ph_vhci;
3986         ct = MDI_PI(pip)->pi_client;
3987         MDI_PI_SET_ONLINING(pip)
3988         MDI_PI_UNLOCK(pip);
3989         f = vh->vh_ops->vo_pi_state_change;
3990         if (f != NULL)
3991                 rv = (*f)(vh->vh_dip, pip, MDI_PATHINFO_STATE_ONLINE, 0,
3992                     flags);
3993         MDI_CLIENT_LOCK(ct);
3994         MDI_PI_LOCK(pip);
3995         cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3996         MDI_PI_UNLOCK(pip);
3997         if (rv == MDI_SUCCESS) {
3998                 dev_info_t      *cdip = ct->ct_dip;
3999 
4000                 rv = MDI_SUCCESS;
4001                 i_mdi_client_update_state(ct);
4002                 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL ||
4003                     MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4004                         if (cdip && !i_ddi_devi_attached(cdip)) {
4005                                 MDI_CLIENT_UNLOCK(ct);
4006                                 rv = ndi_devi_online(cdip, 0);
4007                                 MDI_CLIENT_LOCK(ct);
4008                                 if ((rv != NDI_SUCCESS) &&
4009                                     (MDI_CLIENT_STATE(ct) ==
4010                                     MDI_CLIENT_STATE_DEGRADED)) {
4011                                         MDI_CLIENT_SET_OFFLINE(ct);
4012                                 }
4013                                 if (rv != NDI_SUCCESS) {
4014                                         /* Reset the path state */
4015                                         MDI_PI_LOCK(pip);
4016                                         MDI_PI(pip)->pi_state =
4017                                             MDI_PI_OLD_STATE(pip);
4018                                         MDI_PI_UNLOCK(pip);
4019                                 }
4020                         }
4021                 }
4022                 switch (rv) {
4023                 case NDI_SUCCESS:
4024                         MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
4025                         i_mdi_report_path_state(ct, pip);
4026                         rv = MDI_SUCCESS;
4027                         break;
4028                 case NDI_BUSY:
4029                         rv = MDI_BUSY;
4030                         break;
4031                 default:
4032                         rv = MDI_FAILURE;
4033                         break;
4034                 }
4035         } else {
4036                 /* Reset the path state */
4037                 MDI_PI_LOCK(pip);
4038                 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
4039                 MDI_PI_UNLOCK(pip);
4040         }
4041         MDI_CLIENT_UNLOCK(ct);
4042         return (rv);
4043 }
4044 
4045 /*
4046  * mdi_pi_get_node_name():
4047  *              Get the name associated with a mdi_pathinfo node.
4048  *              Since pathinfo nodes are not directly named, we
4049  *              return the node_name of the client.
4050  *
4051  * Return Values:
4052  *              char *
4053  */
4054 char *
4055 mdi_pi_get_node_name(mdi_pathinfo_t *pip)
4056 {
4057         mdi_client_t    *ct;
4058 
4059         if (pip == NULL)
4060                 return (NULL);
4061         ct = MDI_PI(pip)->pi_client;
4062         if ((ct == NULL) || (ct->ct_dip == NULL))
4063                 return (NULL);
4064         return (ddi_node_name(ct->ct_dip));
4065 }
4066 
4067 /*
4068  * mdi_pi_get_addr():
4069  *              Get the unit address associated with a mdi_pathinfo node
4070  *
4071  * Return Values:
4072  *              char *
4073  */
4074 char *
4075 mdi_pi_get_addr(mdi_pathinfo_t *pip)
4076 {
4077         if (pip == NULL)
4078                 return (NULL);
4079 
4080         return (MDI_PI(pip)->pi_addr);
4081 }
4082 
4083 /*
4084  * mdi_pi_get_path_instance():
4085  *              Get the 'path_instance' of a mdi_pathinfo node
4086  *
4087  * Return Values:
4088  *              path_instance
4089  */
4090 int
4091 mdi_pi_get_path_instance(mdi_pathinfo_t *pip)
4092 {
4093         if (pip == NULL)
4094                 return (0);
4095 
4096         return (MDI_PI(pip)->pi_path_instance);
4097 }
4098 
4099 /*
4100  * mdi_pi_pathname():
4101  *              Return pointer to path to pathinfo node.
4102  */
4103 char *
4104 mdi_pi_pathname(mdi_pathinfo_t *pip)
4105 {
4106         if (pip == NULL)
4107                 return (NULL);
4108         return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip)));
4109 }
4110 
4111 /*
4112  * mdi_pi_spathname():
4113  *              Return pointer to shortpath to pathinfo node. Used for debug
4114  *              messages, so return "" instead of NULL when unknown.
4115  */
4116 char *
4117 mdi_pi_spathname(mdi_pathinfo_t *pip)
4118 {
4119         char    *spath = "";
4120 
4121         if (pip) {
4122                 spath = mdi_pi_spathname_by_instance(
4123                     mdi_pi_get_path_instance(pip));
4124                 if (spath == NULL)
4125                         spath = "";
4126         }
4127         return (spath);
4128 }
4129 
4130 char *
4131 mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path)
4132 {
4133         char *obp_path = NULL;
4134         if ((pip == NULL) || (path == NULL))
4135                 return (NULL);
4136 
4137         if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) {
4138                 (void) strcpy(path, obp_path);
4139                 (void) mdi_prop_free(obp_path);
4140         } else {
4141                 path = NULL;
4142         }
4143         return (path);
4144 }
4145 
4146 int
4147 mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component)
4148 {
4149         dev_info_t *pdip;
4150         char *obp_path = NULL;
4151         int rc = MDI_FAILURE;
4152 
4153         if (pip == NULL)
4154                 return (MDI_FAILURE);
4155 
4156         pdip = mdi_pi_get_phci(pip);
4157         if (pdip == NULL)
4158                 return (MDI_FAILURE);
4159 
4160         obp_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
4161 
4162         if (ddi_pathname_obp(pdip, obp_path) == NULL) {
4163                 (void) ddi_pathname(pdip, obp_path);
4164         }
4165 
4166         if (component) {
4167                 (void) strncat(obp_path, "/", MAXPATHLEN);
4168                 (void) strncat(obp_path, component, MAXPATHLEN);
4169         }
4170         rc = mdi_prop_update_string(pip, "obp-path", obp_path);
4171 
4172         if (obp_path)
4173                 kmem_free(obp_path, MAXPATHLEN);
4174         return (rc);
4175 }
4176 
4177 /*
4178  * mdi_pi_get_client():
4179  *              Get the client devinfo associated with a mdi_pathinfo node
4180  *
4181  * Return Values:
4182  *              Handle to client device dev_info node
4183  */
4184 dev_info_t *
4185 mdi_pi_get_client(mdi_pathinfo_t *pip)
4186 {
4187         dev_info_t      *dip = NULL;
4188         if (pip) {
4189                 dip = MDI_PI(pip)->pi_client->ct_dip;
4190         }
4191         return (dip);
4192 }
4193 
4194 /*
4195  * mdi_pi_get_phci():
4196  *              Get the pHCI devinfo associated with the mdi_pathinfo node
4197  * Return Values:
4198  *              Handle to dev_info node
4199  */
4200 dev_info_t *
4201 mdi_pi_get_phci(mdi_pathinfo_t *pip)
4202 {
4203         dev_info_t      *dip = NULL;
4204         mdi_phci_t      *ph;
4205 
4206         if (pip) {
4207                 ph = MDI_PI(pip)->pi_phci;
4208                 if (ph)
4209                         dip = ph->ph_dip;
4210         }
4211         return (dip);
4212 }
4213 
4214 /*
4215  * mdi_pi_get_client_private():
4216  *              Get the client private information associated with the
4217  *              mdi_pathinfo node
4218  */
4219 void *
4220 mdi_pi_get_client_private(mdi_pathinfo_t *pip)
4221 {
4222         void *cprivate = NULL;
4223         if (pip) {
4224                 cprivate = MDI_PI(pip)->pi_cprivate;
4225         }
4226         return (cprivate);
4227 }
4228 
4229 /*
4230  * mdi_pi_set_client_private():
4231  *              Set the client private information in the mdi_pathinfo node
4232  */
4233 void
4234 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv)
4235 {
4236         if (pip) {
4237                 MDI_PI(pip)->pi_cprivate = priv;
4238         }
4239 }
4240 
4241 /*
4242  * mdi_pi_get_phci_private():
4243  *              Get the pHCI private information associated with the
4244  *              mdi_pathinfo node
4245  */
4246 caddr_t
4247 mdi_pi_get_phci_private(mdi_pathinfo_t *pip)
4248 {
4249         caddr_t pprivate = NULL;
4250 
4251         if (pip) {
4252                 pprivate = MDI_PI(pip)->pi_pprivate;
4253         }
4254         return (pprivate);
4255 }
4256 
4257 /*
4258  * mdi_pi_set_phci_private():
4259  *              Set the pHCI private information in the mdi_pathinfo node
4260  */
4261 void
4262 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv)
4263 {
4264         if (pip) {
4265                 MDI_PI(pip)->pi_pprivate = priv;
4266         }
4267 }
4268 
4269 /*
4270  * mdi_pi_get_state():
4271  *              Get the mdi_pathinfo node state. Transient states are internal
4272  *              and not provided to the users
4273  */
4274 mdi_pathinfo_state_t
4275 mdi_pi_get_state(mdi_pathinfo_t *pip)
4276 {
4277         mdi_pathinfo_state_t    state = MDI_PATHINFO_STATE_INIT;
4278 
4279         if (pip) {
4280                 if (MDI_PI_IS_TRANSIENT(pip)) {
4281                         /*
4282                          * mdi_pathinfo is in state transition.  Return the
4283                          * last good state.
4284                          */
4285                         state = MDI_PI_OLD_STATE(pip);
4286                 } else {
4287                         state = MDI_PI_STATE(pip);
4288                 }
4289         }
4290         return (state);
4291 }
4292 
4293 /*
4294  * mdi_pi_get_flags():
4295  *              Get the mdi_pathinfo node flags.
4296  */
4297 uint_t
4298 mdi_pi_get_flags(mdi_pathinfo_t *pip)
4299 {
4300         return (pip ? MDI_PI(pip)->pi_flags : 0);
4301 }
4302 
4303 /*
4304  * Note that the following function needs to be the new interface for
4305  * mdi_pi_get_state when mpxio gets integrated to ON.
4306  */
4307 int
4308 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state,
4309                 uint32_t *ext_state)
4310 {
4311         *state = MDI_PATHINFO_STATE_INIT;
4312 
4313         if (pip) {
4314                 if (MDI_PI_IS_TRANSIENT(pip)) {
4315                         /*
4316                          * mdi_pathinfo is in state transition.  Return the
4317                          * last good state.
4318                          */
4319                         *state = MDI_PI_OLD_STATE(pip);
4320                         *ext_state = MDI_PI_OLD_EXT_STATE(pip);
4321                 } else {
4322                         *state = MDI_PI_STATE(pip);
4323                         *ext_state = MDI_PI_EXT_STATE(pip);
4324                 }
4325         }
4326         return (MDI_SUCCESS);
4327 }
4328 
4329 /*
4330  * mdi_pi_get_preferred:
4331  *      Get the preferred path flag
4332  */
4333 int
4334 mdi_pi_get_preferred(mdi_pathinfo_t *pip)
4335 {
4336         if (pip) {
4337                 return (MDI_PI(pip)->pi_preferred);
4338         }
4339         return (0);
4340 }
4341 
4342 /*
4343  * mdi_pi_set_preferred:
4344  *      Set the preferred path flag
4345  */
4346 void
4347 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred)
4348 {
4349         if (pip) {
4350                 MDI_PI(pip)->pi_preferred = preferred;
4351         }
4352 }
4353 
4354 /*
4355  * mdi_pi_set_state():
4356  *              Set the mdi_pathinfo node state
4357  */
4358 void
4359 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state)
4360 {
4361         uint32_t        ext_state;
4362 
4363         if (pip) {
4364                 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK;
4365                 MDI_PI(pip)->pi_state = state;
4366                 MDI_PI(pip)->pi_state |= ext_state;
4367 
4368                 /* Path has changed state, invalidate DINFOCACHE snap shot. */
4369                 i_ddi_di_cache_invalidate();
4370         }
4371 }
4372 
4373 /*
4374  * Property functions:
4375  */
4376 int
4377 i_map_nvlist_error_to_mdi(int val)
4378 {
4379         int rv;
4380 
4381         switch (val) {
4382         case 0:
4383                 rv = DDI_PROP_SUCCESS;
4384                 break;
4385         case EINVAL:
4386         case ENOTSUP:
4387                 rv = DDI_PROP_INVAL_ARG;
4388                 break;
4389         case ENOMEM:
4390                 rv = DDI_PROP_NO_MEMORY;
4391                 break;
4392         default:
4393                 rv = DDI_PROP_NOT_FOUND;
4394                 break;
4395         }
4396         return (rv);
4397 }
4398 
4399 /*
4400  * mdi_pi_get_next_prop():
4401  *              Property walk function.  The caller should hold mdi_pi_lock()
4402  *              and release by calling mdi_pi_unlock() at the end of walk to
4403  *              get a consistent value.
4404  */
4405 nvpair_t *
4406 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev)
4407 {
4408         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4409                 return (NULL);
4410         }
4411         ASSERT(MDI_PI_LOCKED(pip));
4412         return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev));
4413 }
4414 
4415 /*
4416  * mdi_prop_remove():
4417  *              Remove the named property from the named list.
4418  */
4419 int
4420 mdi_prop_remove(mdi_pathinfo_t *pip, char *name)
4421 {
4422         if (pip == NULL) {
4423                 return (DDI_PROP_NOT_FOUND);
4424         }
4425         ASSERT(!MDI_PI_LOCKED(pip));
4426         MDI_PI_LOCK(pip);
4427         if (MDI_PI(pip)->pi_prop == NULL) {
4428                 MDI_PI_UNLOCK(pip);
4429                 return (DDI_PROP_NOT_FOUND);
4430         }
4431         if (name) {
4432                 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name);
4433         } else {
4434                 char            nvp_name[MAXNAMELEN];
4435                 nvpair_t        *nvp;
4436                 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL);
4437                 while (nvp) {
4438                         nvpair_t        *next;
4439                         next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp);
4440                         (void) snprintf(nvp_name, sizeof(nvp_name), "%s",
4441                             nvpair_name(nvp));
4442                         (void) nvlist_remove_all(MDI_PI(pip)->pi_prop,
4443                             nvp_name);
4444                         nvp = next;
4445                 }
4446         }
4447         MDI_PI_UNLOCK(pip);
4448         return (DDI_PROP_SUCCESS);
4449 }
4450 
4451 /*
4452  * mdi_prop_size():
4453  *              Get buffer size needed to pack the property data.
4454  *              Caller should hold the mdi_pathinfo_t lock to get a consistent
4455  *              buffer size.
4456  */
4457 int
4458 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp)
4459 {
4460         int     rv;
4461         size_t  bufsize;
4462 
4463         *buflenp = 0;
4464         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4465                 return (DDI_PROP_NOT_FOUND);
4466         }
4467         ASSERT(MDI_PI_LOCKED(pip));
4468         rv = nvlist_size(MDI_PI(pip)->pi_prop,
4469             &bufsize, NV_ENCODE_NATIVE);
4470         *buflenp = bufsize;
4471         return (i_map_nvlist_error_to_mdi(rv));
4472 }
4473 
4474 /*
4475  * mdi_prop_pack():
4476  *              pack the property list.  The caller should hold the
4477  *              mdi_pathinfo_t node to get a consistent data
4478  */
4479 int
4480 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen)
4481 {
4482         int     rv;
4483         size_t  bufsize;
4484 
4485         if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) {
4486                 return (DDI_PROP_NOT_FOUND);
4487         }
4488 
4489         ASSERT(MDI_PI_LOCKED(pip));
4490 
4491         bufsize = buflen;
4492         rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize,
4493             NV_ENCODE_NATIVE, KM_SLEEP);
4494 
4495         return (i_map_nvlist_error_to_mdi(rv));
4496 }
4497 
4498 /*
4499  * mdi_prop_update_byte():
4500  *              Create/Update a byte property
4501  */
4502 int
4503 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data)
4504 {
4505         int rv;
4506 
4507         if (pip == NULL) {
4508                 return (DDI_PROP_INVAL_ARG);
4509         }
4510         ASSERT(!MDI_PI_LOCKED(pip));
4511         MDI_PI_LOCK(pip);
4512         if (MDI_PI(pip)->pi_prop == NULL) {
4513                 MDI_PI_UNLOCK(pip);
4514                 return (DDI_PROP_NOT_FOUND);
4515         }
4516         rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data);
4517         MDI_PI_UNLOCK(pip);
4518         return (i_map_nvlist_error_to_mdi(rv));
4519 }
4520 
4521 /*
4522  * mdi_prop_update_byte_array():
4523  *              Create/Update a byte array property
4524  */
4525 int
4526 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data,
4527     uint_t nelements)
4528 {
4529         int rv;
4530 
4531         if (pip == NULL) {
4532                 return (DDI_PROP_INVAL_ARG);
4533         }
4534         ASSERT(!MDI_PI_LOCKED(pip));
4535         MDI_PI_LOCK(pip);
4536         if (MDI_PI(pip)->pi_prop == NULL) {
4537                 MDI_PI_UNLOCK(pip);
4538                 return (DDI_PROP_NOT_FOUND);
4539         }
4540         rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements);
4541         MDI_PI_UNLOCK(pip);
4542         return (i_map_nvlist_error_to_mdi(rv));
4543 }
4544 
4545 /*
4546  * mdi_prop_update_int():
4547  *              Create/Update a 32 bit integer property
4548  */
4549 int
4550 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data)
4551 {
4552         int rv;
4553 
4554         if (pip == NULL) {
4555                 return (DDI_PROP_INVAL_ARG);
4556         }
4557         ASSERT(!MDI_PI_LOCKED(pip));
4558         MDI_PI_LOCK(pip);
4559         if (MDI_PI(pip)->pi_prop == NULL) {
4560                 MDI_PI_UNLOCK(pip);
4561                 return (DDI_PROP_NOT_FOUND);
4562         }
4563         rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data);
4564         MDI_PI_UNLOCK(pip);
4565         return (i_map_nvlist_error_to_mdi(rv));
4566 }
4567 
4568 /*
4569  * mdi_prop_update_int64():
4570  *              Create/Update a 64 bit integer property
4571  */
4572 int
4573 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data)
4574 {
4575         int rv;
4576 
4577         if (pip == NULL) {
4578                 return (DDI_PROP_INVAL_ARG);
4579         }
4580         ASSERT(!MDI_PI_LOCKED(pip));
4581         MDI_PI_LOCK(pip);
4582         if (MDI_PI(pip)->pi_prop == NULL) {
4583                 MDI_PI_UNLOCK(pip);
4584                 return (DDI_PROP_NOT_FOUND);
4585         }
4586         rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data);
4587         MDI_PI_UNLOCK(pip);
4588         return (i_map_nvlist_error_to_mdi(rv));
4589 }
4590 
4591 /*
4592  * mdi_prop_update_int_array():
4593  *              Create/Update a int array property
4594  */
4595 int
4596 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data,
4597             uint_t nelements)
4598 {
4599         int rv;
4600 
4601         if (pip == NULL) {
4602                 return (DDI_PROP_INVAL_ARG);
4603         }
4604         ASSERT(!MDI_PI_LOCKED(pip));
4605         MDI_PI_LOCK(pip);
4606         if (MDI_PI(pip)->pi_prop == NULL) {
4607                 MDI_PI_UNLOCK(pip);
4608                 return (DDI_PROP_NOT_FOUND);
4609         }
4610         rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data,
4611             nelements);
4612         MDI_PI_UNLOCK(pip);
4613         return (i_map_nvlist_error_to_mdi(rv));
4614 }
4615 
4616 /*
4617  * mdi_prop_update_string():
4618  *              Create/Update a string property
4619  */
4620 int
4621 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data)
4622 {
4623         int rv;
4624 
4625         if (pip == NULL) {
4626                 return (DDI_PROP_INVAL_ARG);
4627         }
4628         ASSERT(!MDI_PI_LOCKED(pip));
4629         MDI_PI_LOCK(pip);
4630         if (MDI_PI(pip)->pi_prop == NULL) {
4631                 MDI_PI_UNLOCK(pip);
4632                 return (DDI_PROP_NOT_FOUND);
4633         }
4634         rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data);
4635         MDI_PI_UNLOCK(pip);
4636         return (i_map_nvlist_error_to_mdi(rv));
4637 }
4638 
4639 /*
4640  * mdi_prop_update_string_array():
4641  *              Create/Update a string array property
4642  */
4643 int
4644 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data,
4645     uint_t nelements)
4646 {
4647         int rv;
4648 
4649         if (pip == NULL) {
4650                 return (DDI_PROP_INVAL_ARG);
4651         }
4652         ASSERT(!MDI_PI_LOCKED(pip));
4653         MDI_PI_LOCK(pip);
4654         if (MDI_PI(pip)->pi_prop == NULL) {
4655                 MDI_PI_UNLOCK(pip);
4656                 return (DDI_PROP_NOT_FOUND);
4657         }
4658         rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data,
4659             nelements);
4660         MDI_PI_UNLOCK(pip);
4661         return (i_map_nvlist_error_to_mdi(rv));
4662 }
4663 
4664 /*
4665  * mdi_prop_lookup_byte():
4666  *              Look for byte property identified by name.  The data returned
4667  *              is the actual property and valid as long as mdi_pathinfo_t node
4668  *              is alive.
4669  */
4670 int
4671 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data)
4672 {
4673         int rv;
4674 
4675         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4676                 return (DDI_PROP_NOT_FOUND);
4677         }
4678         rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data);
4679         return (i_map_nvlist_error_to_mdi(rv));
4680 }
4681 
4682 
4683 /*
4684  * mdi_prop_lookup_byte_array():
4685  *              Look for byte array property identified by name.  The data
4686  *              returned is the actual property and valid as long as
4687  *              mdi_pathinfo_t node is alive.
4688  */
4689 int
4690 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data,
4691     uint_t *nelements)
4692 {
4693         int rv;
4694 
4695         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4696                 return (DDI_PROP_NOT_FOUND);
4697         }
4698         rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data,
4699             nelements);
4700         return (i_map_nvlist_error_to_mdi(rv));
4701 }
4702 
4703 /*
4704  * mdi_prop_lookup_int():
4705  *              Look for int property identified by name.  The data returned
4706  *              is the actual property and valid as long as mdi_pathinfo_t
4707  *              node is alive.
4708  */
4709 int
4710 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data)
4711 {
4712         int rv;
4713 
4714         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4715                 return (DDI_PROP_NOT_FOUND);
4716         }
4717         rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data);
4718         return (i_map_nvlist_error_to_mdi(rv));
4719 }
4720 
4721 /*
4722  * mdi_prop_lookup_int64():
4723  *              Look for int64 property identified by name.  The data returned
4724  *              is the actual property and valid as long as mdi_pathinfo_t node
4725  *              is alive.
4726  */
4727 int
4728 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data)
4729 {
4730         int rv;
4731         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4732                 return (DDI_PROP_NOT_FOUND);
4733         }
4734         rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data);
4735         return (i_map_nvlist_error_to_mdi(rv));
4736 }
4737 
4738 /*
4739  * mdi_prop_lookup_int_array():
4740  *              Look for int array property identified by name.  The data
4741  *              returned is the actual property and valid as long as
4742  *              mdi_pathinfo_t node is alive.
4743  */
4744 int
4745 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data,
4746     uint_t *nelements)
4747 {
4748         int rv;
4749 
4750         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4751                 return (DDI_PROP_NOT_FOUND);
4752         }
4753         rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name,
4754             (int32_t **)data, nelements);
4755         return (i_map_nvlist_error_to_mdi(rv));
4756 }
4757 
4758 /*
4759  * mdi_prop_lookup_string():
4760  *              Look for string property identified by name.  The data
4761  *              returned is the actual property and valid as long as
4762  *              mdi_pathinfo_t node is alive.
4763  */
4764 int
4765 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data)
4766 {
4767         int rv;
4768 
4769         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4770                 return (DDI_PROP_NOT_FOUND);
4771         }
4772         rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data);
4773         return (i_map_nvlist_error_to_mdi(rv));
4774 }
4775 
4776 /*
4777  * mdi_prop_lookup_string_array():
4778  *              Look for string array property identified by name.  The data
4779  *              returned is the actual property and valid as long as
4780  *              mdi_pathinfo_t node is alive.
4781  */
4782 int
4783 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data,
4784     uint_t *nelements)
4785 {
4786         int rv;
4787 
4788         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4789                 return (DDI_PROP_NOT_FOUND);
4790         }
4791         rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data,
4792             nelements);
4793         return (i_map_nvlist_error_to_mdi(rv));
4794 }
4795 
4796 /*
4797  * mdi_prop_free():
4798  *              Symmetrical function to ddi_prop_free(). nvlist_lookup_xx()
4799  *              functions return the pointer to actual property data and not a
4800  *              copy of it.  So the data returned is valid as long as
4801  *              mdi_pathinfo_t node is valid.
4802  */
4803 /*ARGSUSED*/
4804 int
4805 mdi_prop_free(void *data)
4806 {
4807         return (DDI_PROP_SUCCESS);
4808 }
4809 
4810 /*ARGSUSED*/
4811 static void
4812 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip)
4813 {
4814         char            *ct_path;
4815         char            *ct_status;
4816         char            *status;
4817         dev_info_t      *cdip = ct->ct_dip;
4818         char            lb_buf[64];
4819         int             report_lb_c = 0, report_lb_p = 0;
4820 
4821         ASSERT(MDI_CLIENT_LOCKED(ct));
4822         if ((cdip == NULL) || (ddi_get_instance(cdip) == -1) ||
4823             (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) {
4824                 return;
4825         }
4826         if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) {
4827                 ct_status = "optimal";
4828                 report_lb_c = 1;
4829         } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4830                 ct_status = "degraded";
4831         } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
4832                 ct_status = "failed";
4833         } else {
4834                 ct_status = "unknown";
4835         }
4836 
4837         lb_buf[0] = 0;          /* not interested in load balancing config */
4838 
4839         if (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)) {
4840                 status = "removed";
4841         } else if (MDI_PI_IS_OFFLINE(pip)) {
4842                 status = "offline";
4843         } else if (MDI_PI_IS_ONLINE(pip)) {
4844                 status = "online";
4845                 report_lb_p = 1;
4846         } else if (MDI_PI_IS_STANDBY(pip)) {
4847                 status = "standby";
4848         } else if (MDI_PI_IS_FAULT(pip)) {
4849                 status = "faulted";
4850         } else {
4851                 status = "unknown";
4852         }
4853 
4854         if (cdip != NULL) {
4855                 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4856 
4857                 if (report_lb_c && report_lb_p) {
4858                         if (ct->ct_lb == LOAD_BALANCE_LBA) {
4859                                 (void) snprintf(lb_buf, sizeof (lb_buf),
4860                                     "%s, region-size: %d", mdi_load_balance_lba,
4861                                     ct->ct_lb_args->region_size);
4862                         } else if (ct->ct_lb == LOAD_BALANCE_NONE) {
4863                                 (void) snprintf(lb_buf, sizeof (lb_buf),
4864                                     "%s", mdi_load_balance_none);
4865                         } else {
4866                                 (void) snprintf(lb_buf, sizeof (lb_buf), "%s",
4867                                     mdi_load_balance_rr);
4868                         }
4869 
4870                         dev_err(cdip, CE_CONT, "!multipath status: %s: "
4871                             "path %d %s is %s; load balancing: %s\n",
4872                             ct_status, mdi_pi_get_path_instance(pip),
4873                             mdi_pi_spathname(pip), status, lb_buf);
4874                 } else {
4875                         dev_err(cdip, CE_CONT,
4876                             "!multipath status: %s: path %d %s is %s\n",
4877                             ct_status, mdi_pi_get_path_instance(pip),
4878                             mdi_pi_spathname(pip), status);
4879                 }
4880 
4881                 kmem_free(ct_path, MAXPATHLEN);
4882                 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct);
4883         }
4884 }
4885 
4886 #ifdef  DEBUG
4887 /*
4888  * i_mdi_log():
4889  *              Utility function for error message management
4890  *
4891  *              NOTE: Implementation takes care of trailing \n for cmn_err,
4892  *              MDI_DEBUG should not terminate fmt strings with \n.
4893  *
4894  *              NOTE: If the level is >= 2, and there is no leading !?^
4895  *              then a leading ! is implied (but can be overriden via
4896  *              mdi_debug_consoleonly). If you are using kmdb on the console,
4897  *              consider setting mdi_debug_consoleonly to 1 as an aid.
4898  */
4899 /*PRINTFLIKE4*/
4900 static void
4901 i_mdi_log(int level, const char *func, dev_info_t *dip, const char *fmt, ...)
4902 {
4903         char            name[MAXNAMELEN];
4904         char            buf[512];
4905         char            *bp;
4906         va_list         ap;
4907         int             log_only = 0;
4908         int             boot_only = 0;
4909         int             console_only = 0;
4910 
4911         if (dip) {
4912                 (void) snprintf(name, sizeof(name), "%s%d: ",
4913                     ddi_driver_name(dip), ddi_get_instance(dip));
4914         } else {
4915                 name[0] = 0;
4916         }
4917 
4918         va_start(ap, fmt);
4919         (void) vsnprintf(buf, sizeof(buf), fmt, ap);
4920         va_end(ap);
4921 
4922         switch (buf[0]) {
4923         case '!':
4924                 bp = &buf[1];
4925                 log_only = 1;
4926                 break;
4927         case '?':
4928                 bp = &buf[1];
4929                 boot_only = 1;
4930                 break;
4931         case '^':
4932                 bp = &buf[1];
4933                 console_only = 1;
4934                 break;
4935         default:
4936                 if (level >= 2)
4937                         log_only = 1;           /* ! implied */
4938                 bp = buf;
4939                 break;
4940         }
4941         if (mdi_debug_logonly) {
4942                 log_only = 1;
4943                 boot_only = 0;
4944                 console_only = 0;
4945         }
4946         if (mdi_debug_consoleonly) {
4947                 log_only = 0;
4948                 boot_only = 0;
4949                 console_only = 1;
4950                 level = CE_NOTE;
4951                 goto console;
4952         }
4953 
4954         switch (level) {
4955         case CE_NOTE:
4956                 level = CE_CONT;
4957                 /* FALLTHROUGH */
4958         case CE_CONT:
4959                 if (boot_only) {
4960                         cmn_err(level, "?mdi: %s%s: %s\n", name, func, bp);
4961                 } else if (console_only) {
4962                         cmn_err(level, "^mdi: %s%s: %s\n", name, func, bp);
4963                 } else if (log_only) {
4964                         cmn_err(level, "!mdi: %s%s: %s\n", name, func, bp);
4965                 } else {
4966                         cmn_err(level, "mdi: %s%s: %s\n", name, func, bp);
4967                 }
4968                 break;
4969 
4970         case CE_WARN:
4971         case CE_PANIC:
4972         console:
4973                 if (boot_only) {
4974                         cmn_err(level, "?mdi: %s%s: %s", name, func, bp);
4975                 } else if (console_only) {
4976                         cmn_err(level, "^mdi: %s%s: %s", name, func, bp);
4977                 } else if (log_only) {
4978                         cmn_err(level, "!mdi: %s%s: %s", name, func, bp);
4979                 } else {
4980                         cmn_err(level, "mdi: %s%s: %s", name, func, bp);
4981                 }
4982                 break;
4983         default:
4984                 cmn_err(level, "mdi: %s%s", name, bp);
4985                 break;
4986         }
4987 }
4988 #endif  /* DEBUG */
4989 
4990 void
4991 i_mdi_client_online(dev_info_t *ct_dip)
4992 {
4993         mdi_client_t    *ct;
4994 
4995         /*
4996          * Client online notification. Mark client state as online
4997          * restore our binding with dev_info node
4998          */
4999         ct = i_devi_get_client(ct_dip);
5000         ASSERT(ct != NULL);
5001         MDI_CLIENT_LOCK(ct);
5002         MDI_CLIENT_SET_ONLINE(ct);
5003         /* catch for any memory leaks */
5004         ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip));
5005         ct->ct_dip = ct_dip;
5006 
5007         if (ct->ct_power_cnt == 0)
5008                 (void) i_mdi_power_all_phci(ct);
5009 
5010         MDI_DEBUG(4, (MDI_NOTE, ct_dip,
5011             "i_mdi_pm_hold_client %p", (void *)ct));
5012         i_mdi_pm_hold_client(ct, 1);
5013 
5014         MDI_CLIENT_UNLOCK(ct);
5015 }
5016 
5017 void
5018 i_mdi_phci_online(dev_info_t *ph_dip)
5019 {
5020         mdi_phci_t      *ph;
5021 
5022         /* pHCI online notification. Mark state accordingly */
5023         ph = i_devi_get_phci(ph_dip);
5024         ASSERT(ph != NULL);
5025         MDI_PHCI_LOCK(ph);
5026         MDI_PHCI_SET_ONLINE(ph);
5027         MDI_PHCI_UNLOCK(ph);
5028 }
5029 
5030 /*
5031  * mdi_devi_online():
5032  *              Online notification from NDI framework on pHCI/client
5033  *              device online.
5034  * Return Values:
5035  *              NDI_SUCCESS
5036  *              MDI_FAILURE
5037  */
5038 /*ARGSUSED*/
5039 int
5040 mdi_devi_online(dev_info_t *dip, uint_t flags)
5041 {
5042         if (MDI_PHCI(dip)) {
5043                 i_mdi_phci_online(dip);
5044         }
5045 
5046         if (MDI_CLIENT(dip)) {
5047                 i_mdi_client_online(dip);
5048         }
5049         return (NDI_SUCCESS);
5050 }
5051 
5052 /*
5053  * mdi_devi_offline():
5054  *              Offline notification from NDI framework on pHCI/Client device
5055  *              offline.
5056  *
5057  * Return Values:
5058  *              NDI_SUCCESS
5059  *              NDI_FAILURE
5060  */
5061 /*ARGSUSED*/
5062 int
5063 mdi_devi_offline(dev_info_t *dip, uint_t flags)
5064 {
5065         int             rv = NDI_SUCCESS;
5066 
5067         if (MDI_CLIENT(dip)) {
5068                 rv = i_mdi_client_offline(dip, flags);
5069                 if (rv != NDI_SUCCESS)
5070                         return (rv);
5071         }
5072 
5073         if (MDI_PHCI(dip)) {
5074                 rv = i_mdi_phci_offline(dip, flags);
5075 
5076                 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) {
5077                         /* set client back online */
5078                         i_mdi_client_online(dip);
5079                 }
5080         }
5081 
5082         return (rv);
5083 }
5084 
5085 /*ARGSUSED*/
5086 static int
5087 i_mdi_phci_offline(dev_info_t *dip, uint_t flags)
5088 {
5089         int             rv = NDI_SUCCESS;
5090         mdi_phci_t      *ph;
5091         mdi_client_t    *ct;
5092         mdi_pathinfo_t  *pip;
5093         mdi_pathinfo_t  *next;
5094         mdi_pathinfo_t  *failed_pip = NULL;
5095         dev_info_t      *cdip;
5096 
5097         /*
5098          * pHCI component offline notification
5099          * Make sure that this pHCI instance is free to be offlined.
5100          * If it is OK to proceed, Offline and remove all the child
5101          * mdi_pathinfo nodes.  This process automatically offlines
5102          * corresponding client devices, for which this pHCI provides
5103          * critical services.
5104          */
5105         ph = i_devi_get_phci(dip);
5106         MDI_DEBUG(2, (MDI_NOTE, dip,
5107             "called %p %p", (void *)dip, (void *)ph));
5108         if (ph == NULL) {
5109                 return (rv);
5110         }
5111 
5112         MDI_PHCI_LOCK(ph);
5113 
5114         if (MDI_PHCI_IS_OFFLINE(ph)) {
5115                 MDI_DEBUG(1, (MDI_WARN, dip,
5116                     "!pHCI already offlined: %p", (void *)dip));
5117                 MDI_PHCI_UNLOCK(ph);
5118                 return (NDI_SUCCESS);
5119         }
5120 
5121         /*
5122          * Check to see if the pHCI can be offlined
5123          */
5124         if (ph->ph_unstable) {
5125                 MDI_DEBUG(1, (MDI_WARN, dip,
5126                     "!One or more target devices are in transient state. "
5127                     "This device can not be removed at this moment. "
5128                     "Please try again later."));
5129                 MDI_PHCI_UNLOCK(ph);
5130                 return (NDI_BUSY);
5131         }
5132 
5133         pip = ph->ph_path_head;
5134         while (pip != NULL) {
5135                 MDI_PI_LOCK(pip);
5136                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5137 
5138                 /*
5139                  * The mdi_pathinfo state is OK. Check the client state.
5140                  * If failover in progress fail the pHCI from offlining
5141                  */
5142                 ct = MDI_PI(pip)->pi_client;
5143                 i_mdi_client_lock(ct, pip);
5144                 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5145                     (ct->ct_unstable)) {
5146                         /*
5147                          * Failover is in progress, Fail the DR
5148                          */
5149                         MDI_DEBUG(1, (MDI_WARN, dip,
5150                             "!pHCI device is busy. "
5151                             "This device can not be removed at this moment. "
5152                             "Please try again later."));
5153                         MDI_PI_UNLOCK(pip);
5154                         i_mdi_client_unlock(ct);
5155                         MDI_PHCI_UNLOCK(ph);
5156                         return (NDI_BUSY);
5157                 }
5158                 MDI_PI_UNLOCK(pip);
5159 
5160                 /*
5161                  * Check to see of we are removing the last path of this
5162                  * client device...
5163                  */
5164                 cdip = ct->ct_dip;
5165                 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5166                     (i_mdi_client_compute_state(ct, ph) ==
5167                     MDI_CLIENT_STATE_FAILED)) {
5168                         i_mdi_client_unlock(ct);
5169                         MDI_PHCI_UNLOCK(ph);
5170                         if (ndi_devi_offline(cdip,
5171                             NDI_DEVFS_CLEAN) != NDI_SUCCESS) {
5172                                 /*
5173                                  * ndi_devi_offline() failed.
5174                                  * This pHCI provides the critical path
5175                                  * to one or more client devices.
5176                                  * Return busy.
5177                                  */
5178                                 MDI_PHCI_LOCK(ph);
5179                                 MDI_DEBUG(1, (MDI_WARN, dip,
5180                                     "!pHCI device is busy. "
5181                                     "This device can not be removed at this "
5182                                     "moment. Please try again later."));
5183                                 failed_pip = pip;
5184                                 break;
5185                         } else {
5186                                 MDI_PHCI_LOCK(ph);
5187                                 pip = next;
5188                         }
5189                 } else {
5190                         i_mdi_client_unlock(ct);
5191                         pip = next;
5192                 }
5193         }
5194 
5195         if (failed_pip) {
5196                 pip = ph->ph_path_head;
5197                 while (pip != failed_pip) {
5198                         MDI_PI_LOCK(pip);
5199                         next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5200                         ct = MDI_PI(pip)->pi_client;
5201                         i_mdi_client_lock(ct, pip);
5202                         cdip = ct->ct_dip;
5203                         switch (MDI_CLIENT_STATE(ct)) {
5204                         case MDI_CLIENT_STATE_OPTIMAL:
5205                         case MDI_CLIENT_STATE_DEGRADED:
5206                                 if (cdip) {
5207                                         MDI_PI_UNLOCK(pip);
5208                                         i_mdi_client_unlock(ct);
5209                                         MDI_PHCI_UNLOCK(ph);
5210                                         (void) ndi_devi_online(cdip, 0);
5211                                         MDI_PHCI_LOCK(ph);
5212                                         pip = next;
5213                                         continue;
5214                                 }
5215                                 break;
5216 
5217                         case MDI_CLIENT_STATE_FAILED:
5218                                 if (cdip) {
5219                                         MDI_PI_UNLOCK(pip);
5220                                         i_mdi_client_unlock(ct);
5221                                         MDI_PHCI_UNLOCK(ph);
5222                                         (void) ndi_devi_offline(cdip,
5223                                                 NDI_DEVFS_CLEAN);
5224                                         MDI_PHCI_LOCK(ph);
5225                                         pip = next;
5226                                         continue;
5227                                 }
5228                                 break;
5229                         }
5230                         MDI_PI_UNLOCK(pip);
5231                         i_mdi_client_unlock(ct);
5232                         pip = next;
5233                 }
5234                 MDI_PHCI_UNLOCK(ph);
5235                 return (NDI_BUSY);
5236         }
5237 
5238         /*
5239          * Mark the pHCI as offline
5240          */
5241         MDI_PHCI_SET_OFFLINE(ph);
5242 
5243         /*
5244          * Mark the child mdi_pathinfo nodes as transient
5245          */
5246         pip = ph->ph_path_head;
5247         while (pip != NULL) {
5248                 MDI_PI_LOCK(pip);
5249                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5250                 MDI_PI_SET_OFFLINING(pip);
5251                 MDI_PI_UNLOCK(pip);
5252                 pip = next;
5253         }
5254         MDI_PHCI_UNLOCK(ph);
5255         /*
5256          * Give a chance for any pending commands to execute
5257          */
5258         delay_random(mdi_delay);
5259         MDI_PHCI_LOCK(ph);
5260         pip = ph->ph_path_head;
5261         while (pip != NULL) {
5262                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5263                 (void) i_mdi_pi_offline(pip, flags);
5264                 MDI_PI_LOCK(pip);
5265                 ct = MDI_PI(pip)->pi_client;
5266                 if (!MDI_PI_IS_OFFLINE(pip)) {
5267                         MDI_DEBUG(1, (MDI_WARN, dip,
5268                             "!pHCI device is busy. "
5269                             "This device can not be removed at this moment. "
5270                             "Please try again later."));
5271                         MDI_PI_UNLOCK(pip);
5272                         MDI_PHCI_SET_ONLINE(ph);
5273                         MDI_PHCI_UNLOCK(ph);
5274                         return (NDI_BUSY);
5275                 }
5276                 MDI_PI_UNLOCK(pip);
5277                 pip = next;
5278         }
5279         MDI_PHCI_UNLOCK(ph);
5280 
5281         return (rv);
5282 }
5283 
5284 void
5285 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array)
5286 {
5287         mdi_phci_t      *ph;
5288         mdi_client_t    *ct;
5289         mdi_pathinfo_t  *pip;
5290         mdi_pathinfo_t  *next;
5291         dev_info_t      *cdip;
5292 
5293         if (!MDI_PHCI(dip))
5294                 return;
5295 
5296         ph = i_devi_get_phci(dip);
5297         if (ph == NULL) {
5298                 return;
5299         }
5300 
5301         MDI_PHCI_LOCK(ph);
5302 
5303         if (MDI_PHCI_IS_OFFLINE(ph)) {
5304                 /* has no last path */
5305                 MDI_PHCI_UNLOCK(ph);
5306                 return;
5307         }
5308 
5309         pip = ph->ph_path_head;
5310         while (pip != NULL) {
5311                 MDI_PI_LOCK(pip);
5312                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5313 
5314                 ct = MDI_PI(pip)->pi_client;
5315                 i_mdi_client_lock(ct, pip);
5316                 MDI_PI_UNLOCK(pip);
5317 
5318                 cdip = ct->ct_dip;
5319                 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5320                     (i_mdi_client_compute_state(ct, ph) ==
5321                     MDI_CLIENT_STATE_FAILED)) {
5322                         /* Last path. Mark client dip as retiring */
5323                         i_mdi_client_unlock(ct);
5324                         MDI_PHCI_UNLOCK(ph);
5325                         (void) e_ddi_mark_retiring(cdip, cons_array);
5326                         MDI_PHCI_LOCK(ph);
5327                         pip = next;
5328                 } else {
5329                         i_mdi_client_unlock(ct);
5330                         pip = next;
5331                 }
5332         }
5333 
5334         MDI_PHCI_UNLOCK(ph);
5335 
5336         return;
5337 }
5338 
5339 void
5340 mdi_phci_retire_notify(dev_info_t *dip, int *constraint)
5341 {
5342         mdi_phci_t      *ph;
5343         mdi_client_t    *ct;
5344         mdi_pathinfo_t  *pip;
5345         mdi_pathinfo_t  *next;
5346         dev_info_t      *cdip;
5347 
5348         if (!MDI_PHCI(dip))
5349                 return;
5350 
5351         ph = i_devi_get_phci(dip);
5352         if (ph == NULL)
5353                 return;
5354 
5355         MDI_PHCI_LOCK(ph);
5356 
5357         if (MDI_PHCI_IS_OFFLINE(ph)) {
5358                 MDI_PHCI_UNLOCK(ph);
5359                 /* not last path */
5360                 return;
5361         }
5362 
5363         if (ph->ph_unstable) {
5364                 MDI_PHCI_UNLOCK(ph);
5365                 /* can't check for constraints */
5366                 *constraint = 0;
5367                 return;
5368         }
5369 
5370         pip = ph->ph_path_head;
5371         while (pip != NULL) {
5372                 MDI_PI_LOCK(pip);
5373                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5374 
5375                 /*
5376                  * The mdi_pathinfo state is OK. Check the client state.
5377                  * If failover in progress fail the pHCI from offlining
5378                  */
5379                 ct = MDI_PI(pip)->pi_client;
5380                 i_mdi_client_lock(ct, pip);
5381                 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5382                     (ct->ct_unstable)) {
5383                         /*
5384                          * Failover is in progress, can't check for constraints
5385                          */
5386                         MDI_PI_UNLOCK(pip);
5387                         i_mdi_client_unlock(ct);
5388                         MDI_PHCI_UNLOCK(ph);
5389                         *constraint = 0;
5390                         return;
5391                 }
5392                 MDI_PI_UNLOCK(pip);
5393 
5394                 /*
5395                  * Check to see of we are retiring the last path of this
5396                  * client device...
5397                  */
5398                 cdip = ct->ct_dip;
5399                 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5400                     (i_mdi_client_compute_state(ct, ph) ==
5401                     MDI_CLIENT_STATE_FAILED)) {
5402                         i_mdi_client_unlock(ct);
5403                         MDI_PHCI_UNLOCK(ph);
5404                         (void) e_ddi_retire_notify(cdip, constraint);
5405                         MDI_PHCI_LOCK(ph);
5406                         pip = next;
5407                 } else {
5408                         i_mdi_client_unlock(ct);
5409                         pip = next;
5410                 }
5411         }
5412 
5413         MDI_PHCI_UNLOCK(ph);
5414 
5415         return;
5416 }
5417 
5418 /*
5419  * offline the path(s) hanging off the pHCI. If the
5420  * last path to any client, check that constraints
5421  * have been applied.
5422  *
5423  * If constraint is 0, we aren't going to retire the
5424  * pHCI. However we still need to go through the paths
5425  * calling e_ddi_retire_finalize() to clear their
5426  * contract barriers.
5427  */
5428 void
5429 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only, void *constraint)
5430 {
5431         mdi_phci_t      *ph;
5432         mdi_client_t    *ct;
5433         mdi_pathinfo_t  *pip;
5434         mdi_pathinfo_t  *next;
5435         dev_info_t      *cdip;
5436         int             unstable = 0;
5437         int             tmp_constraint;
5438 
5439         if (!MDI_PHCI(dip))
5440                 return;
5441 
5442         ph = i_devi_get_phci(dip);
5443         if (ph == NULL) {
5444                 /* no last path and no pips */
5445                 return;
5446         }
5447 
5448         MDI_PHCI_LOCK(ph);
5449 
5450         if (MDI_PHCI_IS_OFFLINE(ph)) {
5451                 MDI_PHCI_UNLOCK(ph);
5452                 /* no last path and no pips */
5453                 return;
5454         }
5455 
5456         /*
5457          * Check to see if the pHCI can be offlined
5458          */
5459         if (ph->ph_unstable) {
5460                 unstable = 1;
5461         }
5462 
5463         pip = ph->ph_path_head;
5464         while (pip != NULL) {
5465                 MDI_PI_LOCK(pip);
5466                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5467 
5468                 /*
5469                  * if failover in progress fail the pHCI from offlining
5470                  */
5471                 ct = MDI_PI(pip)->pi_client;
5472                 i_mdi_client_lock(ct, pip);
5473                 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5474                     (ct->ct_unstable)) {
5475                         unstable = 1;
5476                 }
5477                 MDI_PI_UNLOCK(pip);
5478 
5479                 /*
5480                  * Check to see of we are removing the last path of this
5481                  * client device...
5482                  */
5483                 cdip = ct->ct_dip;
5484                 if (!phci_only && cdip &&
5485                     (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5486                     (i_mdi_client_compute_state(ct, ph) ==
5487                     MDI_CLIENT_STATE_FAILED)) {
5488                         i_mdi_client_unlock(ct);
5489                         MDI_PHCI_UNLOCK(ph);
5490                         /*
5491                          * This is the last path to this client.
5492                          *
5493                          * Constraint will only be set to 1 if this client can
5494                          * be retired (as already determined by
5495                          * mdi_phci_retire_notify). However we don't actually
5496                          * need to retire the client (we just retire the last
5497                          * path - MPXIO will then fail all I/Os to the client).
5498                          * But we still need to call e_ddi_retire_finalize so
5499                          * the contract barriers can be cleared. Therefore we
5500                          * temporarily set constraint = 0 so that the client
5501                          * dip is not retired.
5502                          */
5503                         tmp_constraint = 0;
5504                         (void) e_ddi_retire_finalize(cdip, &tmp_constraint);
5505                         MDI_PHCI_LOCK(ph);
5506                         pip = next;
5507                 } else {
5508                         i_mdi_client_unlock(ct);
5509                         pip = next;
5510                 }
5511         }
5512 
5513         if (!phci_only && *((int *)constraint) == 0) {
5514                 MDI_PHCI_UNLOCK(ph);
5515                 return;
5516         }
5517 
5518         /*
5519          * Cannot offline pip(s)
5520          */
5521         if (unstable) {
5522                 cmn_err(CE_WARN, "%s%d: mdi_phci_retire_finalize: "
5523                     "pHCI in transient state, cannot retire",
5524                     ddi_driver_name(dip), ddi_get_instance(dip));
5525                 MDI_PHCI_UNLOCK(ph);
5526                 return;
5527         }
5528 
5529         /*
5530          * Mark the pHCI as offline
5531          */
5532         MDI_PHCI_SET_OFFLINE(ph);
5533 
5534         /*
5535          * Mark the child mdi_pathinfo nodes as transient
5536          */
5537         pip = ph->ph_path_head;
5538         while (pip != NULL) {
5539                 MDI_PI_LOCK(pip);
5540                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5541                 MDI_PI_SET_OFFLINING(pip);
5542                 MDI_PI_UNLOCK(pip);
5543                 pip = next;
5544         }
5545         MDI_PHCI_UNLOCK(ph);
5546         /*
5547          * Give a chance for any pending commands to execute
5548          */
5549         delay_random(mdi_delay);
5550         MDI_PHCI_LOCK(ph);
5551         pip = ph->ph_path_head;
5552         while (pip != NULL) {
5553                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5554                 (void) i_mdi_pi_offline(pip, 0);
5555                 MDI_PI_LOCK(pip);
5556                 ct = MDI_PI(pip)->pi_client;
5557                 if (!MDI_PI_IS_OFFLINE(pip)) {
5558                         cmn_err(CE_WARN, "mdi_phci_retire_finalize: "
5559                             "path %d %s busy, cannot offline",
5560                             mdi_pi_get_path_instance(pip),
5561                             mdi_pi_spathname(pip));
5562                         MDI_PI_UNLOCK(pip);
5563                         MDI_PHCI_SET_ONLINE(ph);
5564                         MDI_PHCI_UNLOCK(ph);
5565                         return;
5566                 }
5567                 MDI_PI_UNLOCK(pip);
5568                 pip = next;
5569         }
5570         MDI_PHCI_UNLOCK(ph);
5571 
5572         return;
5573 }
5574 
5575 void
5576 mdi_phci_unretire(dev_info_t *dip)
5577 {
5578         mdi_phci_t      *ph;
5579         mdi_pathinfo_t  *pip;
5580         mdi_pathinfo_t  *next;
5581 
5582         ASSERT(MDI_PHCI(dip));
5583 
5584         /*
5585          * Online the phci
5586          */
5587         i_mdi_phci_online(dip);
5588 
5589         ph = i_devi_get_phci(dip);
5590         MDI_PHCI_LOCK(ph);
5591         pip = ph->ph_path_head;
5592         while (pip != NULL) {
5593                 MDI_PI_LOCK(pip);
5594                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5595                 MDI_PI_UNLOCK(pip);
5596                 (void) i_mdi_pi_online(pip, 0);
5597                 pip = next;
5598         }
5599         MDI_PHCI_UNLOCK(ph);
5600 }
5601 
5602 /*ARGSUSED*/
5603 static int
5604 i_mdi_client_offline(dev_info_t *dip, uint_t flags)
5605 {
5606         int             rv = NDI_SUCCESS;
5607         mdi_client_t    *ct;
5608 
5609         /*
5610          * Client component to go offline.  Make sure that we are
5611          * not in failing over state and update client state
5612          * accordingly
5613          */
5614         ct = i_devi_get_client(dip);
5615         MDI_DEBUG(2, (MDI_NOTE, dip,
5616             "called %p %p", (void *)dip, (void *)ct));
5617         if (ct != NULL) {
5618                 MDI_CLIENT_LOCK(ct);
5619                 if (ct->ct_unstable) {
5620                         /*
5621                          * One or more paths are in transient state,
5622                          * Dont allow offline of a client device
5623                          */
5624                         MDI_DEBUG(1, (MDI_WARN, dip,
5625                             "!One or more paths to "
5626                             "this device are in transient state. "
5627                             "This device can not be removed at this moment. "
5628                             "Please try again later."));
5629                         MDI_CLIENT_UNLOCK(ct);
5630                         return (NDI_BUSY);
5631                 }
5632                 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
5633                         /*
5634                          * Failover is in progress, Dont allow DR of
5635                          * a client device
5636                          */
5637                         MDI_DEBUG(1, (MDI_WARN, dip,
5638                             "!Client device is Busy. "
5639                             "This device can not be removed at this moment. "
5640                             "Please try again later."));
5641                         MDI_CLIENT_UNLOCK(ct);
5642                         return (NDI_BUSY);
5643                 }
5644                 MDI_CLIENT_SET_OFFLINE(ct);
5645 
5646                 /*
5647                  * Unbind our relationship with the dev_info node
5648                  */
5649                 if (flags & NDI_DEVI_REMOVE) {
5650                         ct->ct_dip = NULL;
5651                 }
5652                 MDI_CLIENT_UNLOCK(ct);
5653         }
5654         return (rv);
5655 }
5656 
5657 /*
5658  * mdi_pre_attach():
5659  *              Pre attach() notification handler
5660  */
5661 /*ARGSUSED*/
5662 int
5663 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5664 {
5665         /* don't support old DDI_PM_RESUME */
5666         if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) &&
5667             (cmd == DDI_PM_RESUME))
5668                 return (DDI_FAILURE);
5669 
5670         return (DDI_SUCCESS);
5671 }
5672 
5673 /*
5674  * mdi_post_attach():
5675  *              Post attach() notification handler
5676  */
5677 /*ARGSUSED*/
5678 void
5679 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error)
5680 {
5681         mdi_phci_t      *ph;
5682         mdi_client_t    *ct;
5683         mdi_vhci_t      *vh;
5684 
5685         if (MDI_PHCI(dip)) {
5686                 ph = i_devi_get_phci(dip);
5687                 ASSERT(ph != NULL);
5688 
5689                 MDI_PHCI_LOCK(ph);
5690                 switch (cmd) {
5691                 case DDI_ATTACH:
5692                         MDI_DEBUG(2, (MDI_NOTE, dip,
5693                             "phci post_attach called %p", (void *)ph));
5694                         if (error == DDI_SUCCESS) {
5695                                 MDI_PHCI_SET_ATTACH(ph);
5696                         } else {
5697                                 MDI_DEBUG(1, (MDI_NOTE, dip,
5698                                     "!pHCI post_attach failed: error %d",
5699                                     error));
5700                                 MDI_PHCI_SET_DETACH(ph);
5701                         }
5702                         break;
5703 
5704                 case DDI_RESUME:
5705                         MDI_DEBUG(2, (MDI_NOTE, dip,
5706                             "pHCI post_resume: called %p", (void *)ph));
5707                         if (error == DDI_SUCCESS) {
5708                                 MDI_PHCI_SET_RESUME(ph);
5709                         } else {
5710                                 MDI_DEBUG(1, (MDI_NOTE, dip,
5711                                     "!pHCI post_resume failed: error %d",
5712                                     error));
5713                                 MDI_PHCI_SET_SUSPEND(ph);
5714                         }
5715                         break;
5716                 }
5717                 MDI_PHCI_UNLOCK(ph);
5718         }
5719 
5720         if (MDI_CLIENT(dip)) {
5721                 ct = i_devi_get_client(dip);
5722                 ASSERT(ct != NULL);
5723 
5724                 MDI_CLIENT_LOCK(ct);
5725                 switch (cmd) {
5726                 case DDI_ATTACH:
5727                         MDI_DEBUG(2, (MDI_NOTE, dip,
5728                             "client post_attach called %p", (void *)ct));
5729                         if (error != DDI_SUCCESS) {
5730                                 MDI_DEBUG(1, (MDI_NOTE, dip,
5731                                     "!client post_attach failed: error %d",
5732                                     error));
5733                                 MDI_CLIENT_SET_DETACH(ct);
5734                                 MDI_DEBUG(4, (MDI_WARN, dip,
5735                                     "i_mdi_pm_reset_client"));
5736                                 i_mdi_pm_reset_client(ct);
5737                                 break;
5738                         }
5739 
5740                         /*
5741                          * Client device has successfully attached, inform
5742                          * the vhci.
5743                          */
5744                         vh = ct->ct_vhci;
5745                         if (vh->vh_ops->vo_client_attached)
5746                                 (*vh->vh_ops->vo_client_attached)(dip);
5747 
5748                         MDI_CLIENT_SET_ATTACH(ct);
5749                         break;
5750 
5751                 case DDI_RESUME:
5752                         MDI_DEBUG(2, (MDI_NOTE, dip,
5753                             "client post_attach: called %p", (void *)ct));
5754                         if (error == DDI_SUCCESS) {
5755                                 MDI_CLIENT_SET_RESUME(ct);
5756                         } else {
5757                                 MDI_DEBUG(1, (MDI_NOTE, dip,
5758                                     "!client post_resume failed: error %d",
5759                                     error));
5760                                 MDI_CLIENT_SET_SUSPEND(ct);
5761                         }
5762                         break;
5763                 }
5764                 MDI_CLIENT_UNLOCK(ct);
5765         }
5766 }
5767 
5768 /*
5769  * mdi_pre_detach():
5770  *              Pre detach notification handler
5771  */
5772 /*ARGSUSED*/
5773 int
5774 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5775 {
5776         int rv = DDI_SUCCESS;
5777 
5778         if (MDI_CLIENT(dip)) {
5779                 (void) i_mdi_client_pre_detach(dip, cmd);
5780         }
5781 
5782         if (MDI_PHCI(dip)) {
5783                 rv = i_mdi_phci_pre_detach(dip, cmd);
5784         }
5785 
5786         return (rv);
5787 }
5788 
5789 /*ARGSUSED*/
5790 static int
5791 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5792 {
5793         int             rv = DDI_SUCCESS;
5794         mdi_phci_t      *ph;
5795         mdi_client_t    *ct;
5796         mdi_pathinfo_t  *pip;
5797         mdi_pathinfo_t  *failed_pip = NULL;
5798         mdi_pathinfo_t  *next;
5799 
5800         ph = i_devi_get_phci(dip);
5801         if (ph == NULL) {
5802                 return (rv);
5803         }
5804 
5805         MDI_PHCI_LOCK(ph);
5806         switch (cmd) {
5807         case DDI_DETACH:
5808                 MDI_DEBUG(2, (MDI_NOTE, dip,
5809                     "pHCI pre_detach: called %p", (void *)ph));
5810                 if (!MDI_PHCI_IS_OFFLINE(ph)) {
5811                         /*
5812                          * mdi_pathinfo nodes are still attached to
5813                          * this pHCI. Fail the detach for this pHCI.
5814                          */
5815                         MDI_DEBUG(2, (MDI_WARN, dip,
5816                             "pHCI pre_detach: paths are still attached %p",
5817                             (void *)ph));
5818                         rv = DDI_FAILURE;
5819                         break;
5820                 }
5821                 MDI_PHCI_SET_DETACH(ph);
5822                 break;
5823 
5824         case DDI_SUSPEND:
5825                 /*
5826                  * pHCI is getting suspended.  Since mpxio client
5827                  * devices may not be suspended at this point, to avoid
5828                  * a potential stack overflow, it is important to suspend
5829                  * client devices before pHCI can be suspended.
5830                  */
5831 
5832                 MDI_DEBUG(2, (MDI_NOTE, dip,
5833                     "pHCI pre_suspend: called %p", (void *)ph));
5834                 /*
5835                  * Suspend all the client devices accessible through this pHCI
5836                  */
5837                 pip = ph->ph_path_head;
5838                 while (pip != NULL && rv == DDI_SUCCESS) {
5839                         dev_info_t *cdip;
5840                         MDI_PI_LOCK(pip);
5841                         next =
5842                             (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5843                         ct = MDI_PI(pip)->pi_client;
5844                         i_mdi_client_lock(ct, pip);
5845                         cdip = ct->ct_dip;
5846                         MDI_PI_UNLOCK(pip);
5847                         if ((MDI_CLIENT_IS_DETACHED(ct) == 0) &&
5848                             MDI_CLIENT_IS_SUSPENDED(ct) == 0) {
5849                                 i_mdi_client_unlock(ct);
5850                                 if ((rv = devi_detach(cdip, DDI_SUSPEND)) !=
5851                                     DDI_SUCCESS) {
5852                                         /*
5853                                          * Suspend of one of the client
5854                                          * device has failed.
5855                                          */
5856                                         MDI_DEBUG(1, (MDI_WARN, dip,
5857                                             "!suspend of device (%s%d) failed.",
5858                                             ddi_driver_name(cdip),
5859                                             ddi_get_instance(cdip)));
5860                                         failed_pip = pip;
5861                                         break;
5862                                 }
5863                         } else {
5864                                 i_mdi_client_unlock(ct);
5865                         }
5866                         pip = next;
5867                 }
5868 
5869                 if (rv == DDI_SUCCESS) {
5870                         /*
5871                          * Suspend of client devices is complete. Proceed
5872                          * with pHCI suspend.
5873                          */
5874                         MDI_PHCI_SET_SUSPEND(ph);
5875                 } else {
5876                         /*
5877                          * Revert back all the suspended client device states
5878                          * to converse.
5879                          */
5880                         pip = ph->ph_path_head;
5881                         while (pip != failed_pip) {
5882                                 dev_info_t *cdip;
5883                                 MDI_PI_LOCK(pip);
5884                                 next =
5885                                     (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5886                                 ct = MDI_PI(pip)->pi_client;
5887                                 i_mdi_client_lock(ct, pip);
5888                                 cdip = ct->ct_dip;
5889                                 MDI_PI_UNLOCK(pip);
5890                                 if (MDI_CLIENT_IS_SUSPENDED(ct)) {
5891                                         i_mdi_client_unlock(ct);
5892                                         (void) devi_attach(cdip, DDI_RESUME);
5893                                 } else {
5894                                         i_mdi_client_unlock(ct);
5895                                 }
5896                                 pip = next;
5897                         }
5898                 }
5899                 break;
5900 
5901         default:
5902                 rv = DDI_FAILURE;
5903                 break;
5904         }
5905         MDI_PHCI_UNLOCK(ph);
5906         return (rv);
5907 }
5908 
5909 /*ARGSUSED*/
5910 static int
5911 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5912 {
5913         int             rv = DDI_SUCCESS;
5914         mdi_client_t    *ct;
5915 
5916         ct = i_devi_get_client(dip);
5917         if (ct == NULL) {
5918                 return (rv);
5919         }
5920 
5921         MDI_CLIENT_LOCK(ct);
5922         switch (cmd) {
5923         case DDI_DETACH:
5924                 MDI_DEBUG(2, (MDI_NOTE, dip,
5925                     "client pre_detach: called %p",
5926                      (void *)ct));
5927                 MDI_CLIENT_SET_DETACH(ct);
5928                 break;
5929 
5930         case DDI_SUSPEND:
5931                 MDI_DEBUG(2, (MDI_NOTE, dip,
5932                     "client pre_suspend: called %p",
5933                     (void *)ct));
5934                 MDI_CLIENT_SET_SUSPEND(ct);
5935                 break;
5936 
5937         default:
5938                 rv = DDI_FAILURE;
5939                 break;
5940         }
5941         MDI_CLIENT_UNLOCK(ct);
5942         return (rv);
5943 }
5944 
5945 /*
5946  * mdi_post_detach():
5947  *              Post detach notification handler
5948  */
5949 /*ARGSUSED*/
5950 void
5951 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5952 {
5953         /*
5954          * Detach/Suspend of mpxio component failed. Update our state
5955          * too
5956          */
5957         if (MDI_PHCI(dip))
5958                 i_mdi_phci_post_detach(dip, cmd, error);
5959 
5960         if (MDI_CLIENT(dip))
5961                 i_mdi_client_post_detach(dip, cmd, error);
5962 }
5963 
5964 /*ARGSUSED*/
5965 static void
5966 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5967 {
5968         mdi_phci_t      *ph;
5969 
5970         /*
5971          * Detach/Suspend of phci component failed. Update our state
5972          * too
5973          */
5974         ph = i_devi_get_phci(dip);
5975         if (ph == NULL) {
5976                 return;
5977         }
5978 
5979         MDI_PHCI_LOCK(ph);
5980         /*
5981          * Detach of pHCI failed. Restore back converse
5982          * state
5983          */
5984         switch (cmd) {
5985         case DDI_DETACH:
5986                 MDI_DEBUG(2, (MDI_NOTE, dip,
5987                     "pHCI post_detach: called %p",
5988                     (void *)ph));
5989                 if (error != DDI_SUCCESS)
5990                         MDI_PHCI_SET_ATTACH(ph);
5991                 break;
5992 
5993         case DDI_SUSPEND:
5994                 MDI_DEBUG(2, (MDI_NOTE, dip,
5995                     "pHCI post_suspend: called %p",
5996                     (void *)ph));
5997                 if (error != DDI_SUCCESS)
5998                         MDI_PHCI_SET_RESUME(ph);
5999                 break;
6000         }
6001         MDI_PHCI_UNLOCK(ph);
6002 }
6003 
6004 /*ARGSUSED*/
6005 static void
6006 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
6007 {
6008         mdi_client_t    *ct;
6009 
6010         ct = i_devi_get_client(dip);
6011         if (ct == NULL) {
6012                 return;
6013         }
6014         MDI_CLIENT_LOCK(ct);
6015         /*
6016          * Detach of Client failed. Restore back converse
6017          * state
6018          */
6019         switch (cmd) {
6020         case DDI_DETACH:
6021                 MDI_DEBUG(2, (MDI_NOTE, dip,
6022                     "client post_detach: called %p", (void *)ct));
6023                 if (DEVI_IS_ATTACHING(dip)) {
6024                         MDI_DEBUG(4, (MDI_NOTE, dip,
6025                             "i_mdi_pm_rele_client\n"));
6026                         i_mdi_pm_rele_client(ct, ct->ct_path_count);
6027                 } else {
6028                         MDI_DEBUG(4, (MDI_NOTE, dip,
6029                             "i_mdi_pm_reset_client\n"));
6030                         i_mdi_pm_reset_client(ct);
6031                 }
6032                 if (error != DDI_SUCCESS)
6033                         MDI_CLIENT_SET_ATTACH(ct);
6034                 break;
6035 
6036         case DDI_SUSPEND:
6037                 MDI_DEBUG(2, (MDI_NOTE, dip,
6038                     "called %p", (void *)ct));
6039                 if (error != DDI_SUCCESS)
6040                         MDI_CLIENT_SET_RESUME(ct);
6041                 break;
6042         }
6043         MDI_CLIENT_UNLOCK(ct);
6044 }
6045 
6046 int
6047 mdi_pi_kstat_exists(mdi_pathinfo_t *pip)
6048 {
6049         return (MDI_PI(pip)->pi_kstats ? 1 : 0);
6050 }
6051 
6052 /*
6053  * create and install per-path (client - pHCI) statistics
6054  * I/O stats supported: nread, nwritten, reads, and writes
6055  * Error stats - hard errors, soft errors, & transport errors
6056  */
6057 int
6058 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname)
6059 {
6060         kstat_t                 *kiosp, *kerrsp;
6061         struct pi_errs          *nsp;
6062         struct mdi_pi_kstats    *mdi_statp;
6063         char                    *errksname;
6064         size_t                  len;
6065 
6066         /*
6067          * If the kstat name was already created nothing to do.
6068          */
6069         if ((kiosp = kstat_hold_byname("mdi", 0, ksname,
6070             ALL_ZONES)) != NULL) {
6071                 kstat_rele(kiosp);
6072                 return (MDI_SUCCESS);
6073         }
6074 
6075         if ((kiosp = kstat_create("mdi", 0, ksname, "iopath",
6076             KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) {
6077                 return (MDI_FAILURE);
6078         }
6079 
6080         len = strlen(ksname) + strlen(",err") + 1;
6081         errksname = kmem_alloc(len, KM_SLEEP);
6082         (void) snprintf(errksname, len, "%s,err", ksname);
6083 
6084         kerrsp = kstat_create("mdi", 0, errksname, "iopath_errors",
6085             KSTAT_TYPE_NAMED,
6086             sizeof (struct pi_errs) / sizeof (kstat_named_t), 0);
6087         if (kerrsp == NULL) {
6088                 kstat_delete(kiosp);
6089                 kmem_free(errksname, len);
6090                 return (MDI_FAILURE);
6091         }
6092 
6093         nsp = (struct pi_errs *)kerrsp->ks_data;
6094         kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32);
6095         kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32);
6096         kstat_named_init(&nsp->pi_transerrs, "Transport Errors",
6097             KSTAT_DATA_UINT32);
6098         kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy",
6099             KSTAT_DATA_UINT32);
6100         kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors",
6101             KSTAT_DATA_UINT32);
6102         kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources",
6103             KSTAT_DATA_UINT32);
6104         kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors",
6105             KSTAT_DATA_UINT32);
6106         kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State",
6107             KSTAT_DATA_UINT32);
6108         kstat_named_init(&nsp->pi_failedfrom, "Failed From",
6109             KSTAT_DATA_UINT32);
6110         kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32);
6111 
6112         mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP);
6113         mdi_statp->pi_kstat_ref = 1;
6114         mdi_statp->pi_kstat_iostats = kiosp;
6115         mdi_statp->pi_kstat_errstats = kerrsp;
6116         kstat_install(kiosp);
6117         kstat_install(kerrsp);
6118         MDI_PI(pip)->pi_kstats = mdi_statp;
6119         kmem_free(errksname, len);
6120         return (MDI_SUCCESS);
6121 }
6122 
6123 /*
6124  * destroy per-path properties
6125  */
6126 static void
6127 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip)
6128 {
6129 
6130         struct mdi_pi_kstats *mdi_statp;
6131 
6132         if (MDI_PI(pip)->pi_kstats == NULL)
6133                 return;
6134         if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL)
6135                 return;
6136 
6137         MDI_PI(pip)->pi_kstats = NULL;
6138 
6139         /*
6140          * the kstat may be shared between multiple pathinfo nodes
6141          * decrement this pathinfo's usage, removing the kstats
6142          * themselves when the last pathinfo reference is removed.
6143          */
6144         ASSERT(mdi_statp->pi_kstat_ref > 0);
6145         if (--mdi_statp->pi_kstat_ref != 0)
6146                 return;
6147 
6148         kstat_delete(mdi_statp->pi_kstat_iostats);
6149         kstat_delete(mdi_statp->pi_kstat_errstats);
6150         kmem_free(mdi_statp, sizeof (*mdi_statp));
6151 }
6152 
6153 /*
6154  * update I/O paths KSTATS
6155  */
6156 void
6157 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp)
6158 {
6159         kstat_t *iostatp;
6160         size_t xfer_cnt;
6161 
6162         ASSERT(pip != NULL);
6163 
6164         /*
6165          * I/O can be driven across a path prior to having path
6166          * statistics available, i.e. probe(9e).
6167          */
6168         if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) {
6169                 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats;
6170                 xfer_cnt = bp->b_bcount - bp->b_resid;
6171                 if (bp->b_flags & B_READ) {
6172                         KSTAT_IO_PTR(iostatp)->reads++;
6173                         KSTAT_IO_PTR(iostatp)->nread += xfer_cnt;
6174                 } else {
6175                         KSTAT_IO_PTR(iostatp)->writes++;
6176                         KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt;
6177                 }
6178         }
6179 }
6180 
6181 /*
6182  * Enable the path(specific client/target/initiator)
6183  * Enabling a path means that MPxIO may select the enabled path for routing
6184  * future I/O requests, subject to other path state constraints.
6185  */
6186 int
6187 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags)
6188 {
6189         mdi_phci_t      *ph;
6190 
6191         ph = MDI_PI(pip)->pi_phci;
6192         if (ph == NULL) {
6193                 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip),
6194                     "!failed: path %s %p: NULL ph",
6195                     mdi_pi_spathname(pip), (void *)pip));
6196                 return (MDI_FAILURE);
6197         }
6198 
6199         (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags,
6200                 MDI_ENABLE_OP);
6201         MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip,
6202             "!returning success pip = %p. ph = %p",
6203             (void *)pip, (void *)ph));
6204         return (MDI_SUCCESS);
6205 
6206 }
6207 
6208 /*
6209  * Disable the path (specific client/target/initiator)
6210  * Disabling a path means that MPxIO will not select the disabled path for
6211  * routing any new I/O requests.
6212  */
6213 int
6214 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags)
6215 {
6216         mdi_phci_t      *ph;
6217 
6218         ph = MDI_PI(pip)->pi_phci;
6219         if (ph == NULL) {
6220                 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip),
6221                     "!failed: path %s %p: NULL ph",
6222                     mdi_pi_spathname(pip), (void *)pip));
6223                 return (MDI_FAILURE);
6224         }
6225 
6226         (void) i_mdi_enable_disable_path(pip,
6227             ph->ph_vhci, flags, MDI_DISABLE_OP);
6228         MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip,
6229             "!returning success pip = %p. ph = %p",
6230             (void *)pip, (void *)ph));
6231         return (MDI_SUCCESS);
6232 }
6233 
6234 /*
6235  * disable the path to a particular pHCI (pHCI specified in the phci_path
6236  * argument) for a particular client (specified in the client_path argument).
6237  * Disabling a path means that MPxIO will not select the disabled path for
6238  * routing any new I/O requests.
6239  * NOTE: this will be removed once the NWS files are changed to use the new
6240  * mdi_{enable,disable}_path interfaces
6241  */
6242 int
6243 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags)
6244 {
6245         return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP));
6246 }
6247 
6248 /*
6249  * Enable the path to a particular pHCI (pHCI specified in the phci_path
6250  * argument) for a particular client (specified in the client_path argument).
6251  * Enabling a path means that MPxIO may select the enabled path for routing
6252  * future I/O requests, subject to other path state constraints.
6253  * NOTE: this will be removed once the NWS files are changed to use the new
6254  * mdi_{enable,disable}_path interfaces
6255  */
6256 
6257 int
6258 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags)
6259 {
6260         return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP));
6261 }
6262 
6263 /*
6264  * Common routine for doing enable/disable.
6265  */
6266 static mdi_pathinfo_t *
6267 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags,
6268                 int op)
6269 {
6270         int             sync_flag = 0;
6271         int             rv;
6272         mdi_pathinfo_t  *next;
6273         int             (*f)() = NULL;
6274 
6275         /*
6276          * Check to make sure the path is not already in the
6277          * requested state. If it is just return the next path
6278          * as we have nothing to do here.
6279          */
6280         if ((MDI_PI_IS_DISABLE(pip) && op == MDI_DISABLE_OP) ||
6281             (!MDI_PI_IS_DISABLE(pip) && op == MDI_ENABLE_OP)) {
6282                 MDI_PI_LOCK(pip);
6283                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
6284                 MDI_PI_UNLOCK(pip);
6285                 return (next);
6286         }
6287 
6288         f = vh->vh_ops->vo_pi_state_change;
6289 
6290         sync_flag = (flags << 8) & 0xf00;
6291 
6292         /*
6293          * Do a callback into the mdi consumer to let it
6294          * know that path is about to get enabled/disabled.
6295          */
6296         if (f != NULL) {
6297                 rv = (*f)(vh->vh_dip, pip, 0,
6298                         MDI_PI_EXT_STATE(pip),
6299                         MDI_EXT_STATE_CHANGE | sync_flag |
6300                         op | MDI_BEFORE_STATE_CHANGE);
6301                 if (rv != MDI_SUCCESS) {
6302                         MDI_DEBUG(2, (MDI_WARN, vh->vh_dip,
6303                             "vo_pi_state_change: failed rv = %x", rv));
6304                 }
6305         }
6306         MDI_PI_LOCK(pip);
6307         next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
6308 
6309         switch (flags) {
6310                 case USER_DISABLE:
6311                         if (op == MDI_DISABLE_OP) {
6312                                 MDI_PI_SET_USER_DISABLE(pip);
6313                         } else {
6314                                 MDI_PI_SET_USER_ENABLE(pip);
6315                         }
6316                         break;
6317                 case DRIVER_DISABLE:
6318                         if (op == MDI_DISABLE_OP) {
6319                                 MDI_PI_SET_DRV_DISABLE(pip);
6320                         } else {
6321                                 MDI_PI_SET_DRV_ENABLE(pip);
6322                         }
6323                         break;
6324                 case DRIVER_DISABLE_TRANSIENT:
6325                         if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) {
6326                                 MDI_PI_SET_DRV_DISABLE_TRANS(pip);
6327                         } else {
6328                                 MDI_PI_SET_DRV_ENABLE_TRANS(pip);
6329                         }
6330                         break;
6331         }
6332         MDI_PI_UNLOCK(pip);
6333         /*
6334          * Do a callback into the mdi consumer to let it
6335          * know that path is now enabled/disabled.
6336          */
6337         if (f != NULL) {
6338                 rv = (*f)(vh->vh_dip, pip, 0,
6339                         MDI_PI_EXT_STATE(pip),
6340                         MDI_EXT_STATE_CHANGE | sync_flag |
6341                         op | MDI_AFTER_STATE_CHANGE);
6342                 if (rv != MDI_SUCCESS) {
6343                         MDI_DEBUG(2, (MDI_WARN, vh->vh_dip,
6344                             "vo_pi_state_change failed: rv = %x", rv));
6345                 }
6346         }
6347         return (next);
6348 }
6349 
6350 /*
6351  * Common routine for doing enable/disable.
6352  * NOTE: this will be removed once the NWS files are changed to use the new
6353  * mdi_{enable,disable}_path has been putback
6354  */
6355 int
6356 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op)
6357 {
6358 
6359         mdi_phci_t      *ph;
6360         mdi_vhci_t      *vh = NULL;
6361         mdi_client_t    *ct;
6362         mdi_pathinfo_t  *next, *pip;
6363         int             found_it;
6364 
6365         ph = i_devi_get_phci(pdip);
6366         MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip,
6367             "!op = %d pdip = %p cdip = %p", op, (void *)pdip,
6368             (void *)cdip));
6369         if (ph == NULL) {
6370                 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6371                     "!failed: operation %d: NULL ph", op));
6372                 return (MDI_FAILURE);
6373         }
6374 
6375         if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) {
6376                 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6377                     "!failed: invalid operation %d", op));
6378                 return (MDI_FAILURE);
6379         }
6380 
6381         vh = ph->ph_vhci;
6382 
6383         if (cdip == NULL) {
6384                 /*
6385                  * Need to mark the Phci as enabled/disabled.
6386                  */
6387                 MDI_DEBUG(4, (MDI_NOTE, cdip ? cdip : pdip,
6388                     "op %d for the phci", op));
6389                 MDI_PHCI_LOCK(ph);
6390                 switch (flags) {
6391                         case USER_DISABLE:
6392                                 if (op == MDI_DISABLE_OP) {
6393                                         MDI_PHCI_SET_USER_DISABLE(ph);
6394                                 } else {
6395                                         MDI_PHCI_SET_USER_ENABLE(ph);
6396                                 }
6397                                 break;
6398                         case DRIVER_DISABLE:
6399                                 if (op == MDI_DISABLE_OP) {
6400                                         MDI_PHCI_SET_DRV_DISABLE(ph);
6401                                 } else {
6402                                         MDI_PHCI_SET_DRV_ENABLE(ph);
6403                                 }
6404                                 break;
6405                         case DRIVER_DISABLE_TRANSIENT:
6406                                 if (op == MDI_DISABLE_OP) {
6407                                         MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph);
6408                                 } else {
6409                                         MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph);
6410                                 }
6411                                 break;
6412                         default:
6413                                 MDI_PHCI_UNLOCK(ph);
6414                                 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6415                                     "!invalid flag argument= %d", flags));
6416                 }
6417 
6418                 /*
6419                  * Phci has been disabled. Now try to enable/disable
6420                  * path info's to each client.
6421                  */
6422                 pip = ph->ph_path_head;
6423                 while (pip != NULL) {
6424                         pip = i_mdi_enable_disable_path(pip, vh, flags, op);
6425                 }
6426                 MDI_PHCI_UNLOCK(ph);
6427         } else {
6428 
6429                 /*
6430                  * Disable a specific client.
6431                  */
6432                 ct = i_devi_get_client(cdip);
6433                 if (ct == NULL) {
6434                         MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6435                             "!failed: operation = %d: NULL ct", op));
6436                         return (MDI_FAILURE);
6437                 }
6438 
6439                 MDI_CLIENT_LOCK(ct);
6440                 pip = ct->ct_path_head;
6441                 found_it = 0;
6442                 while (pip != NULL) {
6443                         MDI_PI_LOCK(pip);
6444                         next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6445                         if (MDI_PI(pip)->pi_phci == ph) {
6446                                 MDI_PI_UNLOCK(pip);
6447                                 found_it = 1;
6448                                 break;
6449                         }
6450                         MDI_PI_UNLOCK(pip);
6451                         pip = next;
6452                 }
6453 
6454 
6455                 MDI_CLIENT_UNLOCK(ct);
6456                 if (found_it == 0) {
6457                         MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6458                             "!failed. Could not find corresponding pip\n"));
6459                         return (MDI_FAILURE);
6460                 }
6461 
6462                 (void) i_mdi_enable_disable_path(pip, vh, flags, op);
6463         }
6464 
6465         MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip,
6466             "!op %d returning success pdip = %p cdip = %p",
6467             op, (void *)pdip, (void *)cdip));
6468         return (MDI_SUCCESS);
6469 }
6470 
6471 /*
6472  * Ensure phci powered up
6473  */
6474 static void
6475 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip)
6476 {
6477         dev_info_t      *ph_dip;
6478 
6479         ASSERT(pip != NULL);
6480         ASSERT(MDI_PI_LOCKED(pip));
6481 
6482         if (MDI_PI(pip)->pi_pm_held) {
6483                 return;
6484         }
6485 
6486         ph_dip = mdi_pi_get_phci(pip);
6487         MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6488             "%s %p", mdi_pi_spathname(pip), (void *)pip));
6489         if (ph_dip == NULL) {
6490                 return;
6491         }
6492 
6493         MDI_PI_UNLOCK(pip);
6494         MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt was %d",
6495             DEVI(ph_dip)->devi_pm_kidsupcnt));
6496         pm_hold_power(ph_dip);
6497         MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt is %d",
6498             DEVI(ph_dip)->devi_pm_kidsupcnt));
6499         MDI_PI_LOCK(pip);
6500 
6501         /* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */
6502         if (DEVI(ph_dip)->devi_pm_info)
6503                 MDI_PI(pip)->pi_pm_held = 1;
6504 }
6505 
6506 /*
6507  * Allow phci powered down
6508  */
6509 static void
6510 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip)
6511 {
6512         dev_info_t      *ph_dip = NULL;
6513 
6514         ASSERT(pip != NULL);
6515         ASSERT(MDI_PI_LOCKED(pip));
6516 
6517         if (MDI_PI(pip)->pi_pm_held == 0) {
6518                 return;
6519         }
6520 
6521         ph_dip = mdi_pi_get_phci(pip);
6522         ASSERT(ph_dip != NULL);
6523 
6524         MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6525             "%s %p", mdi_pi_spathname(pip), (void *)pip));
6526 
6527         MDI_PI_UNLOCK(pip);
6528         MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6529             "kidsupcnt was %d", DEVI(ph_dip)->devi_pm_kidsupcnt));
6530         pm_rele_power(ph_dip);
6531         MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6532             "kidsupcnt is %d", DEVI(ph_dip)->devi_pm_kidsupcnt));
6533         MDI_PI_LOCK(pip);
6534 
6535         MDI_PI(pip)->pi_pm_held = 0;
6536 }
6537 
6538 static void
6539 i_mdi_pm_hold_client(mdi_client_t *ct, int incr)
6540 {
6541         ASSERT(MDI_CLIENT_LOCKED(ct));
6542 
6543         ct->ct_power_cnt += incr;
6544         MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6545             "%p ct_power_cnt = %d incr = %d",
6546             (void *)ct, ct->ct_power_cnt, incr));
6547         ASSERT(ct->ct_power_cnt >= 0);
6548 }
6549 
6550 static void
6551 i_mdi_rele_all_phci(mdi_client_t *ct)
6552 {
6553         mdi_pathinfo_t  *pip;
6554 
6555         ASSERT(MDI_CLIENT_LOCKED(ct));
6556         pip = (mdi_pathinfo_t *)ct->ct_path_head;
6557         while (pip != NULL) {
6558                 mdi_hold_path(pip);
6559                 MDI_PI_LOCK(pip);
6560                 i_mdi_pm_rele_pip(pip);
6561                 MDI_PI_UNLOCK(pip);
6562                 mdi_rele_path(pip);
6563                 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6564         }
6565 }
6566 
6567 static void
6568 i_mdi_pm_rele_client(mdi_client_t *ct, int decr)
6569 {
6570         ASSERT(MDI_CLIENT_LOCKED(ct));
6571 
6572         if (i_ddi_devi_attached(ct->ct_dip)) {
6573                 ct->ct_power_cnt -= decr;
6574                 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6575                     "%p ct_power_cnt = %d decr = %d",
6576                     (void *)ct, ct->ct_power_cnt, decr));
6577         }
6578 
6579         ASSERT(ct->ct_power_cnt >= 0);
6580         if (ct->ct_power_cnt == 0) {
6581                 i_mdi_rele_all_phci(ct);
6582                 return;
6583         }
6584 }
6585 
6586 static void
6587 i_mdi_pm_reset_client(mdi_client_t *ct)
6588 {
6589         MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6590             "%p ct_power_cnt = %d", (void *)ct, ct->ct_power_cnt));
6591         ASSERT(MDI_CLIENT_LOCKED(ct));
6592         ct->ct_power_cnt = 0;
6593         i_mdi_rele_all_phci(ct);
6594         ct->ct_powercnt_config = 0;
6595         ct->ct_powercnt_unconfig = 0;
6596         ct->ct_powercnt_reset = 1;
6597 }
6598 
6599 static int
6600 i_mdi_power_one_phci(mdi_pathinfo_t *pip)
6601 {
6602         int             ret;
6603         dev_info_t      *ph_dip;
6604 
6605         MDI_PI_LOCK(pip);
6606         i_mdi_pm_hold_pip(pip);
6607 
6608         ph_dip = mdi_pi_get_phci(pip);
6609         MDI_PI_UNLOCK(pip);
6610 
6611         /* bring all components of phci to full power */
6612         MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6613             "pm_powerup for %s%d %p", ddi_driver_name(ph_dip),
6614             ddi_get_instance(ph_dip), (void *)pip));
6615 
6616         ret = pm_powerup(ph_dip);
6617 
6618         if (ret == DDI_FAILURE) {
6619                 MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6620                     "pm_powerup FAILED for %s%d %p",
6621                     ddi_driver_name(ph_dip), ddi_get_instance(ph_dip),
6622                     (void *)pip));
6623 
6624                 MDI_PI_LOCK(pip);
6625                 i_mdi_pm_rele_pip(pip);
6626                 MDI_PI_UNLOCK(pip);
6627                 return (MDI_FAILURE);
6628         }
6629 
6630         return (MDI_SUCCESS);
6631 }
6632 
6633 static int
6634 i_mdi_power_all_phci(mdi_client_t *ct)
6635 {
6636         mdi_pathinfo_t  *pip;
6637         int             succeeded = 0;
6638 
6639         ASSERT(MDI_CLIENT_LOCKED(ct));
6640         pip = (mdi_pathinfo_t *)ct->ct_path_head;
6641         while (pip != NULL) {
6642                 /*
6643                  * Don't power if MDI_PATHINFO_STATE_FAULT
6644                  * or MDI_PATHINFO_STATE_OFFLINE.
6645                  */
6646                 if (MDI_PI_IS_INIT(pip) ||
6647                     MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) {
6648                         mdi_hold_path(pip);
6649                         MDI_CLIENT_UNLOCK(ct);
6650                         if (i_mdi_power_one_phci(pip) == MDI_SUCCESS)
6651                                 succeeded = 1;
6652 
6653                         ASSERT(ct == MDI_PI(pip)->pi_client);
6654                         MDI_CLIENT_LOCK(ct);
6655                         mdi_rele_path(pip);
6656                 }
6657                 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6658         }
6659 
6660         return (succeeded ? MDI_SUCCESS : MDI_FAILURE);
6661 }
6662 
6663 /*
6664  * mdi_bus_power():
6665  *              1. Place the phci(s) into powered up state so that
6666  *                 client can do power management
6667  *              2. Ensure phci powered up as client power managing
6668  * Return Values:
6669  *              MDI_SUCCESS
6670  *              MDI_FAILURE
6671  */
6672 int
6673 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op,
6674     void *arg, void *result)
6675 {
6676         int                     ret = MDI_SUCCESS;
6677         pm_bp_child_pwrchg_t    *bpc;
6678         mdi_client_t            *ct;
6679         dev_info_t              *cdip;
6680         pm_bp_has_changed_t     *bphc;
6681 
6682         /*
6683          * BUS_POWER_NOINVOL not supported
6684          */
6685         if (op == BUS_POWER_NOINVOL)
6686                 return (MDI_FAILURE);
6687 
6688         /*
6689          * ignore other OPs.
6690          * return quickly to save cou cycles on the ct processing
6691          */
6692         switch (op) {
6693         case BUS_POWER_PRE_NOTIFICATION:
6694         case BUS_POWER_POST_NOTIFICATION:
6695                 bpc = (pm_bp_child_pwrchg_t *)arg;
6696                 cdip = bpc->bpc_dip;
6697                 break;
6698         case BUS_POWER_HAS_CHANGED:
6699                 bphc = (pm_bp_has_changed_t *)arg;
6700                 cdip = bphc->bphc_dip;
6701                 break;
6702         default:
6703                 return (pm_busop_bus_power(parent, impl_arg, op, arg, result));
6704         }
6705 
6706         ASSERT(MDI_CLIENT(cdip));
6707 
6708         ct = i_devi_get_client(cdip);
6709         if (ct == NULL)
6710                 return (MDI_FAILURE);
6711 
6712         /*
6713          * wait till the mdi_pathinfo node state change are processed
6714          */
6715         MDI_CLIENT_LOCK(ct);
6716         switch (op) {
6717         case BUS_POWER_PRE_NOTIFICATION:
6718                 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6719                     "BUS_POWER_PRE_NOTIFICATION:"
6720                     "%s@%s, olevel=%d, nlevel=%d, comp=%d",
6721                     ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6722                     bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp));
6723 
6724                 /* serialize power level change per client */
6725                 while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6726                         cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6727 
6728                 MDI_CLIENT_SET_POWER_TRANSITION(ct);
6729 
6730                 if (ct->ct_power_cnt == 0) {
6731                         ret = i_mdi_power_all_phci(ct);
6732                 }
6733 
6734                 /*
6735                  * if new_level > 0:
6736                  *      - hold phci(s)
6737                  *      - power up phci(s) if not already
6738                  * ignore power down
6739                  */
6740                 if (bpc->bpc_nlevel > 0) {
6741                         if (!DEVI_IS_ATTACHING(ct->ct_dip)) {
6742                                 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6743                                     "i_mdi_pm_hold_client\n"));
6744                                 i_mdi_pm_hold_client(ct, ct->ct_path_count);
6745                         }
6746                 }
6747                 break;
6748         case BUS_POWER_POST_NOTIFICATION:
6749                 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6750                     "BUS_POWER_POST_NOTIFICATION:"
6751                     "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d",
6752                     ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6753                     bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp,
6754                     *(int *)result));
6755 
6756                 if (*(int *)result == DDI_SUCCESS) {
6757                         if (bpc->bpc_nlevel > 0) {
6758                                 MDI_CLIENT_SET_POWER_UP(ct);
6759                         } else {
6760                                 MDI_CLIENT_SET_POWER_DOWN(ct);
6761                         }
6762                 }
6763 
6764                 /* release the hold we did in pre-notification */
6765                 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) &&
6766                     !DEVI_IS_ATTACHING(ct->ct_dip)) {
6767                         MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6768                             "i_mdi_pm_rele_client\n"));
6769                         i_mdi_pm_rele_client(ct, ct->ct_path_count);
6770                 }
6771 
6772                 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) {
6773                         /* another thread might started attaching */
6774                         if (DEVI_IS_ATTACHING(ct->ct_dip)) {
6775                                 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6776                                     "i_mdi_pm_rele_client\n"));
6777                                 i_mdi_pm_rele_client(ct, ct->ct_path_count);
6778                         /* detaching has been taken care in pm_post_unconfig */
6779                         } else if (!DEVI_IS_DETACHING(ct->ct_dip)) {
6780                                 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6781                                     "i_mdi_pm_reset_client\n"));
6782                                 i_mdi_pm_reset_client(ct);
6783                         }
6784                 }
6785 
6786                 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct);
6787                 cv_broadcast(&ct->ct_powerchange_cv);
6788 
6789                 break;
6790 
6791         /* need to do more */
6792         case BUS_POWER_HAS_CHANGED:
6793                 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6794                     "BUS_POWER_HAS_CHANGED:"
6795                     "%s@%s, olevel=%d, nlevel=%d, comp=%d",
6796                     ddi_node_name(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip),
6797                     bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp));
6798 
6799                 if (bphc->bphc_nlevel > 0 &&
6800                     bphc->bphc_nlevel > bphc->bphc_olevel) {
6801                         if (ct->ct_power_cnt == 0) {
6802                                 ret = i_mdi_power_all_phci(ct);
6803                         }
6804                         MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6805                             "i_mdi_pm_hold_client\n"));
6806                         i_mdi_pm_hold_client(ct, ct->ct_path_count);
6807                 }
6808 
6809                 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) {
6810                         MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6811                             "i_mdi_pm_rele_client\n"));
6812                         i_mdi_pm_rele_client(ct, ct->ct_path_count);
6813                 }
6814                 break;
6815         }
6816 
6817         MDI_CLIENT_UNLOCK(ct);
6818         return (ret);
6819 }
6820 
6821 static int
6822 i_mdi_pm_pre_config_one(dev_info_t *child)
6823 {
6824         int             ret = MDI_SUCCESS;
6825         mdi_client_t    *ct;
6826 
6827         ct = i_devi_get_client(child);
6828         if (ct == NULL)
6829                 return (MDI_FAILURE);
6830 
6831         MDI_CLIENT_LOCK(ct);
6832         while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6833                 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6834 
6835         if (!MDI_CLIENT_IS_FAILED(ct)) {
6836                 MDI_CLIENT_UNLOCK(ct);
6837                 MDI_DEBUG(4, (MDI_NOTE, child, "already configured\n"));
6838                 return (MDI_SUCCESS);
6839         }
6840 
6841         if (ct->ct_powercnt_config) {
6842                 MDI_CLIENT_UNLOCK(ct);
6843                 MDI_DEBUG(4, (MDI_NOTE, child, "already held\n"));
6844                 return (MDI_SUCCESS);
6845         }
6846 
6847         if (ct->ct_power_cnt == 0) {
6848                 ret = i_mdi_power_all_phci(ct);
6849         }
6850         MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n"));
6851         i_mdi_pm_hold_client(ct, ct->ct_path_count);
6852         ct->ct_powercnt_config = 1;
6853         ct->ct_powercnt_reset = 0;
6854         MDI_CLIENT_UNLOCK(ct);
6855         return (ret);
6856 }
6857 
6858 static int
6859 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child)
6860 {
6861         int                     ret = MDI_SUCCESS;
6862         dev_info_t              *cdip;
6863         int                     circ;
6864 
6865         ASSERT(MDI_VHCI(vdip));
6866 
6867         /* ndi_devi_config_one */
6868         if (child) {
6869                 ASSERT(DEVI_BUSY_OWNED(vdip));
6870                 return (i_mdi_pm_pre_config_one(child));
6871         }
6872 
6873         /* devi_config_common */
6874         ndi_devi_enter(vdip, &circ);
6875         cdip = ddi_get_child(vdip);
6876         while (cdip) {
6877                 dev_info_t *next = ddi_get_next_sibling(cdip);
6878 
6879                 ret = i_mdi_pm_pre_config_one(cdip);
6880                 if (ret != MDI_SUCCESS)
6881                         break;
6882                 cdip = next;
6883         }
6884         ndi_devi_exit(vdip, circ);
6885         return (ret);
6886 }
6887 
6888 static int
6889 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags)
6890 {
6891         int             ret = MDI_SUCCESS;
6892         mdi_client_t    *ct;
6893 
6894         ct = i_devi_get_client(child);
6895         if (ct == NULL)
6896                 return (MDI_FAILURE);
6897 
6898         MDI_CLIENT_LOCK(ct);
6899         while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6900                 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6901 
6902         if (!i_ddi_devi_attached(child)) {
6903                 MDI_DEBUG(4, (MDI_NOTE, child, "node detached already\n"));
6904                 MDI_CLIENT_UNLOCK(ct);
6905                 return (MDI_SUCCESS);
6906         }
6907 
6908         if (MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6909             (flags & NDI_AUTODETACH)) {
6910                 MDI_DEBUG(4, (MDI_NOTE, child, "auto-modunload\n"));
6911                 MDI_CLIENT_UNLOCK(ct);
6912                 return (MDI_FAILURE);
6913         }
6914 
6915         if (ct->ct_powercnt_unconfig) {
6916                 MDI_DEBUG(4, (MDI_NOTE, child, "ct_powercnt_held\n"));
6917                 MDI_CLIENT_UNLOCK(ct);
6918                 *held = 1;
6919                 return (MDI_SUCCESS);
6920         }
6921 
6922         if (ct->ct_power_cnt == 0) {
6923                 ret = i_mdi_power_all_phci(ct);
6924         }
6925         MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n"));
6926         i_mdi_pm_hold_client(ct, ct->ct_path_count);
6927         ct->ct_powercnt_unconfig = 1;
6928         ct->ct_powercnt_reset = 0;
6929         MDI_CLIENT_UNLOCK(ct);
6930         if (ret == MDI_SUCCESS)
6931                 *held = 1;
6932         return (ret);
6933 }
6934 
6935 static int
6936 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held,
6937     int flags)
6938 {
6939         int                     ret = MDI_SUCCESS;
6940         dev_info_t              *cdip;
6941         int                     circ;
6942 
6943         ASSERT(MDI_VHCI(vdip));
6944         *held = 0;
6945 
6946         /* ndi_devi_unconfig_one */
6947         if (child) {
6948                 ASSERT(DEVI_BUSY_OWNED(vdip));
6949                 return (i_mdi_pm_pre_unconfig_one(child, held, flags));
6950         }
6951 
6952         /* devi_unconfig_common */
6953         ndi_devi_enter(vdip, &circ);
6954         cdip = ddi_get_child(vdip);
6955         while (cdip) {
6956                 dev_info_t *next = ddi_get_next_sibling(cdip);
6957 
6958                 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags);
6959                 cdip = next;
6960         }
6961         ndi_devi_exit(vdip, circ);
6962 
6963         if (*held)
6964                 ret = MDI_SUCCESS;
6965 
6966         return (ret);
6967 }
6968 
6969 static void
6970 i_mdi_pm_post_config_one(dev_info_t *child)
6971 {
6972         mdi_client_t    *ct;
6973 
6974         ct = i_devi_get_client(child);
6975         if (ct == NULL)
6976                 return;
6977 
6978         MDI_CLIENT_LOCK(ct);
6979         while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6980                 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6981 
6982         if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) {
6983                 MDI_DEBUG(4, (MDI_NOTE, child, "not configured\n"));
6984                 MDI_CLIENT_UNLOCK(ct);
6985                 return;
6986         }
6987 
6988         /* client has not been updated */
6989         if (MDI_CLIENT_IS_FAILED(ct)) {
6990                 MDI_DEBUG(4, (MDI_NOTE, child, "client failed\n"));
6991                 MDI_CLIENT_UNLOCK(ct);
6992                 return;
6993         }
6994 
6995         /* another thread might have powered it down or detached it */
6996         if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6997             !DEVI_IS_ATTACHING(child)) ||
6998             (!i_ddi_devi_attached(child) &&
6999             !DEVI_IS_ATTACHING(child))) {
7000                 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n"));
7001                 i_mdi_pm_reset_client(ct);
7002         } else {
7003                 mdi_pathinfo_t  *pip, *next;
7004                 int     valid_path_count = 0;
7005 
7006                 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n"));
7007                 pip = ct->ct_path_head;
7008                 while (pip != NULL) {
7009                         MDI_PI_LOCK(pip);
7010                         next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
7011                         if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
7012                                 valid_path_count ++;
7013                         MDI_PI_UNLOCK(pip);
7014                         pip = next;
7015                 }
7016                 i_mdi_pm_rele_client(ct, valid_path_count);
7017         }
7018         ct->ct_powercnt_config = 0;
7019         MDI_CLIENT_UNLOCK(ct);
7020 }
7021 
7022 static void
7023 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child)
7024 {
7025         int             circ;
7026         dev_info_t      *cdip;
7027 
7028         ASSERT(MDI_VHCI(vdip));
7029 
7030         /* ndi_devi_config_one */
7031         if (child) {
7032                 ASSERT(DEVI_BUSY_OWNED(vdip));
7033                 i_mdi_pm_post_config_one(child);
7034                 return;
7035         }
7036 
7037         /* devi_config_common */
7038         ndi_devi_enter(vdip, &circ);
7039         cdip = ddi_get_child(vdip);
7040         while (cdip) {
7041                 dev_info_t *next = ddi_get_next_sibling(cdip);
7042 
7043                 i_mdi_pm_post_config_one(cdip);
7044                 cdip = next;
7045         }
7046         ndi_devi_exit(vdip, circ);
7047 }
7048 
7049 static void
7050 i_mdi_pm_post_unconfig_one(dev_info_t *child)
7051 {
7052         mdi_client_t    *ct;
7053 
7054         ct = i_devi_get_client(child);
7055         if (ct == NULL)
7056                 return;
7057 
7058         MDI_CLIENT_LOCK(ct);
7059         while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
7060                 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
7061 
7062         if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) {
7063                 MDI_DEBUG(4, (MDI_NOTE, child, "not held\n"));
7064                 MDI_CLIENT_UNLOCK(ct);
7065                 return;
7066         }
7067 
7068         /* failure detaching or another thread just attached it */
7069         if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
7070             i_ddi_devi_attached(child)) ||
7071             (!i_ddi_devi_attached(child) &&
7072             !DEVI_IS_ATTACHING(child))) {
7073                 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n"));
7074                 i_mdi_pm_reset_client(ct);
7075         } else {
7076                 mdi_pathinfo_t  *pip, *next;
7077                 int     valid_path_count = 0;
7078 
7079                 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n"));
7080                 pip = ct->ct_path_head;
7081                 while (pip != NULL) {
7082                         MDI_PI_LOCK(pip);
7083                         next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
7084                         if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
7085                                 valid_path_count ++;
7086                         MDI_PI_UNLOCK(pip);
7087                         pip = next;
7088                 }
7089                 i_mdi_pm_rele_client(ct, valid_path_count);
7090                 ct->ct_powercnt_unconfig = 0;
7091         }
7092 
7093         MDI_CLIENT_UNLOCK(ct);
7094 }
7095 
7096 static void
7097 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held)
7098 {
7099         int                     circ;
7100         dev_info_t              *cdip;
7101 
7102         ASSERT(MDI_VHCI(vdip));
7103 
7104         if (!held) {
7105                 MDI_DEBUG(4, (MDI_NOTE, vdip, "held = %d", held));
7106                 return;
7107         }
7108 
7109         if (child) {
7110                 ASSERT(DEVI_BUSY_OWNED(vdip));
7111                 i_mdi_pm_post_unconfig_one(child);
7112                 return;
7113         }
7114 
7115         ndi_devi_enter(vdip, &circ);
7116         cdip = ddi_get_child(vdip);
7117         while (cdip) {
7118                 dev_info_t *next = ddi_get_next_sibling(cdip);
7119 
7120                 i_mdi_pm_post_unconfig_one(cdip);
7121                 cdip = next;
7122         }
7123         ndi_devi_exit(vdip, circ);
7124 }
7125 
7126 int
7127 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags)
7128 {
7129         int                     circ, ret = MDI_SUCCESS;
7130         dev_info_t              *client_dip = NULL;
7131         mdi_client_t            *ct;
7132 
7133         /*
7134          * Handling ndi_devi_config_one and ndi_devi_unconfig_one.
7135          * Power up pHCI for the named client device.
7136          * Note: Before the client is enumerated under vhci by phci,
7137          * client_dip can be NULL. Then proceed to power up all the
7138          * pHCIs.
7139          */
7140         if (devnm != NULL) {
7141                 ndi_devi_enter(vdip, &circ);
7142                 client_dip = ndi_devi_findchild(vdip, devnm);
7143         }
7144 
7145         MDI_DEBUG(4, (MDI_NOTE, vdip,
7146             "op = %d %s %p", op, devnm ? devnm : "", (void *)client_dip));
7147 
7148         switch (op) {
7149         case MDI_PM_PRE_CONFIG:
7150                 ret = i_mdi_pm_pre_config(vdip, client_dip);
7151                 break;
7152 
7153         case MDI_PM_PRE_UNCONFIG:
7154                 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args,
7155                     flags);
7156                 break;
7157 
7158         case MDI_PM_POST_CONFIG:
7159                 i_mdi_pm_post_config(vdip, client_dip);
7160                 break;
7161 
7162         case MDI_PM_POST_UNCONFIG:
7163                 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args);
7164                 break;
7165 
7166         case MDI_PM_HOLD_POWER:
7167         case MDI_PM_RELE_POWER:
7168                 ASSERT(args);
7169 
7170                 client_dip = (dev_info_t *)args;
7171                 ASSERT(MDI_CLIENT(client_dip));
7172 
7173                 ct = i_devi_get_client(client_dip);
7174                 MDI_CLIENT_LOCK(ct);
7175 
7176                 if (op == MDI_PM_HOLD_POWER) {
7177                         if (ct->ct_power_cnt == 0) {
7178                                 (void) i_mdi_power_all_phci(ct);
7179                                 MDI_DEBUG(4, (MDI_NOTE, client_dip,
7180                                     "i_mdi_pm_hold_client\n"));
7181                                 i_mdi_pm_hold_client(ct, ct->ct_path_count);
7182                         }
7183                 } else {
7184                         if (DEVI_IS_ATTACHING(client_dip)) {
7185                                 MDI_DEBUG(4, (MDI_NOTE, client_dip,
7186                                     "i_mdi_pm_rele_client\n"));
7187                                 i_mdi_pm_rele_client(ct, ct->ct_path_count);
7188                         } else {
7189                                 MDI_DEBUG(4, (MDI_NOTE, client_dip,
7190                                     "i_mdi_pm_reset_client\n"));
7191                                 i_mdi_pm_reset_client(ct);
7192                         }
7193                 }
7194 
7195                 MDI_CLIENT_UNLOCK(ct);
7196                 break;
7197 
7198         default:
7199                 break;
7200         }
7201 
7202         if (devnm)
7203                 ndi_devi_exit(vdip, circ);
7204 
7205         return (ret);
7206 }
7207 
7208 int
7209 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class)
7210 {
7211         mdi_vhci_t *vhci;
7212 
7213         if (!MDI_VHCI(dip))
7214                 return (MDI_FAILURE);
7215 
7216         if (mdi_class) {
7217                 vhci = DEVI(dip)->devi_mdi_xhci;
7218                 ASSERT(vhci);
7219                 *mdi_class = vhci->vh_class;
7220         }
7221 
7222         return (MDI_SUCCESS);
7223 }
7224 
7225 int
7226 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class)
7227 {
7228         mdi_phci_t *phci;
7229 
7230         if (!MDI_PHCI(dip))
7231                 return (MDI_FAILURE);
7232 
7233         if (mdi_class) {
7234                 phci = DEVI(dip)->devi_mdi_xhci;
7235                 ASSERT(phci);
7236                 *mdi_class = phci->ph_vhci->vh_class;
7237         }
7238 
7239         return (MDI_SUCCESS);
7240 }
7241 
7242 int
7243 mdi_component_is_client(dev_info_t *dip, const char **mdi_class)
7244 {
7245         mdi_client_t *client;
7246 
7247         if (!MDI_CLIENT(dip))
7248                 return (MDI_FAILURE);
7249 
7250         if (mdi_class) {
7251                 client = DEVI(dip)->devi_mdi_client;
7252                 ASSERT(client);
7253                 *mdi_class = client->ct_vhci->vh_class;
7254         }
7255 
7256         return (MDI_SUCCESS);
7257 }
7258 
7259 void *
7260 mdi_client_get_vhci_private(dev_info_t *dip)
7261 {
7262         ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
7263         if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
7264                 mdi_client_t    *ct;
7265                 ct = i_devi_get_client(dip);
7266                 return (ct->ct_vprivate);
7267         }
7268         return (NULL);
7269 }
7270 
7271 void
7272 mdi_client_set_vhci_private(dev_info_t *dip, void *data)
7273 {
7274         ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
7275         if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
7276                 mdi_client_t    *ct;
7277                 ct = i_devi_get_client(dip);
7278                 ct->ct_vprivate = data;
7279         }
7280 }
7281 /*
7282  * mdi_pi_get_vhci_private():
7283  *              Get the vhci private information associated with the
7284  *              mdi_pathinfo node
7285  */
7286 void *
7287 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip)
7288 {
7289         caddr_t vprivate = NULL;
7290         if (pip) {
7291                 vprivate = MDI_PI(pip)->pi_vprivate;
7292         }
7293         return (vprivate);
7294 }
7295 
7296 /*
7297  * mdi_pi_set_vhci_private():
7298  *              Set the vhci private information in the mdi_pathinfo node
7299  */
7300 void
7301 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv)
7302 {
7303         if (pip) {
7304                 MDI_PI(pip)->pi_vprivate = priv;
7305         }
7306 }
7307 
7308 /*
7309  * mdi_phci_get_vhci_private():
7310  *              Get the vhci private information associated with the
7311  *              mdi_phci node
7312  */
7313 void *
7314 mdi_phci_get_vhci_private(dev_info_t *dip)
7315 {
7316         ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
7317         if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
7318                 mdi_phci_t      *ph;
7319                 ph = i_devi_get_phci(dip);
7320                 return (ph->ph_vprivate);
7321         }
7322         return (NULL);
7323 }
7324 
7325 /*
7326  * mdi_phci_set_vhci_private():
7327  *              Set the vhci private information in the mdi_phci node
7328  */
7329 void
7330 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv)
7331 {
7332         ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
7333         if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
7334                 mdi_phci_t      *ph;
7335                 ph = i_devi_get_phci(dip);
7336                 ph->ph_vprivate = priv;
7337         }
7338 }
7339 
7340 int
7341 mdi_pi_ishidden(mdi_pathinfo_t *pip)
7342 {
7343         return (MDI_PI_FLAGS_IS_HIDDEN(pip));
7344 }
7345 
7346 int
7347 mdi_pi_device_isremoved(mdi_pathinfo_t *pip)
7348 {
7349         return (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip));
7350 }
7351 
7352 /* Return 1 if all client paths are device_removed */
7353 static int
7354 i_mdi_client_all_devices_removed(mdi_client_t *ct)
7355 {
7356         mdi_pathinfo_t  *pip;
7357         int             all_devices_removed = 1;
7358 
7359         MDI_CLIENT_LOCK(ct);
7360         for (pip = ct->ct_path_head; pip;
7361             pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) {
7362                 if (!mdi_pi_device_isremoved(pip)) {
7363                         all_devices_removed = 0;
7364                         break;
7365                 }
7366         }
7367         MDI_CLIENT_UNLOCK(ct);
7368         return (all_devices_removed);
7369 }
7370 
7371 /*
7372  * When processing path hotunplug, represent device removal.
7373  */
7374 int
7375 mdi_pi_device_remove(mdi_pathinfo_t *pip)
7376 {
7377         mdi_client_t    *ct;
7378 
7379         MDI_PI_LOCK(pip);
7380         if (mdi_pi_device_isremoved(pip)) {
7381                 MDI_PI_UNLOCK(pip);
7382                 return (0);
7383         }
7384         MDI_PI_FLAGS_SET_DEVICE_REMOVED(pip);
7385         MDI_PI_FLAGS_SET_HIDDEN(pip);
7386         MDI_PI_UNLOCK(pip);
7387 
7388         /*
7389          * If all paths associated with the client are now DEVICE_REMOVED,
7390          * reflect DEVICE_REMOVED in the client.
7391          */
7392         ct = MDI_PI(pip)->pi_client;
7393         if (ct && ct->ct_dip && i_mdi_client_all_devices_removed(ct))
7394                 (void) ndi_devi_device_remove(ct->ct_dip);
7395         else
7396                 i_ddi_di_cache_invalidate();
7397 
7398         return (1);
7399 }
7400 
7401 /*
7402  * When processing hotplug, if a path marked mdi_pi_device_isremoved()
7403  * is now accessible then this interfaces is used to represent device insertion.
7404  */
7405 int
7406 mdi_pi_device_insert(mdi_pathinfo_t *pip)
7407 {
7408         MDI_PI_LOCK(pip);
7409         if (!mdi_pi_device_isremoved(pip)) {
7410                 MDI_PI_UNLOCK(pip);
7411                 return (0);
7412         }
7413         MDI_PI_FLAGS_CLR_DEVICE_REMOVED(pip);
7414         MDI_PI_FLAGS_CLR_HIDDEN(pip);
7415         MDI_PI_UNLOCK(pip);
7416 
7417         i_ddi_di_cache_invalidate();
7418 
7419         return (1);
7420 }
7421 
7422 /*
7423  * List of vhci class names:
7424  * A vhci class name must be in this list only if the corresponding vhci
7425  * driver intends to use the mdi provided bus config implementation
7426  * (i.e., mdi_vhci_bus_config()).
7427  */
7428 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB };
7429 #define N_VHCI_CLASSES  (sizeof (vhci_class_list) / sizeof (char *))
7430 
7431 /*
7432  * During boot time, the on-disk vhci cache for every vhci class is read
7433  * in the form of an nvlist and stored here.
7434  */
7435 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES];
7436 
7437 /* nvpair names in vhci cache nvlist */
7438 #define MDI_VHCI_CACHE_VERSION  1
7439 #define MDI_NVPNAME_VERSION     "version"
7440 #define MDI_NVPNAME_PHCIS       "phcis"
7441 #define MDI_NVPNAME_CTADDRMAP   "clientaddrmap"
7442 
7443 /*
7444  * Given vhci class name, return its on-disk vhci cache filename.
7445  * Memory for the returned filename which includes the full path is allocated
7446  * by this function.
7447  */
7448 static char *
7449 vhclass2vhcache_filename(char *vhclass)
7450 {
7451         char *filename;
7452         int len;
7453         static char *fmt = "/etc/devices/mdi_%s_cache";
7454 
7455         /*
7456          * fmt contains the on-disk vhci cache file name format;
7457          * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache".
7458          */
7459 
7460         /* the -1 below is to account for "%s" in the format string */
7461         len = strlen(fmt) + strlen(vhclass) - 1;
7462         filename = kmem_alloc(len, KM_SLEEP);
7463         (void) snprintf(filename, len, fmt, vhclass);
7464         ASSERT(len == (strlen(filename) + 1));
7465         return (filename);
7466 }
7467 
7468 /*
7469  * initialize the vhci cache related data structures and read the on-disk
7470  * vhci cached data into memory.
7471  */
7472 static void
7473 setup_vhci_cache(mdi_vhci_t *vh)
7474 {
7475         mdi_vhci_config_t *vhc;
7476         mdi_vhci_cache_t *vhcache;
7477         int i;
7478         nvlist_t *nvl = NULL;
7479 
7480         vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP);
7481         vh->vh_config = vhc;
7482         vhcache = &vhc->vhc_vhcache;
7483 
7484         vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class);
7485 
7486         mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL);
7487         cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL);
7488 
7489         rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL);
7490 
7491         /*
7492          * Create string hash; same as mod_hash_create_strhash() except that
7493          * we use NULL key destructor.
7494          */
7495         vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class,
7496             mdi_bus_config_cache_hash_size,
7497             mod_hash_null_keydtor, mod_hash_null_valdtor,
7498             mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
7499 
7500         /*
7501          * The on-disk vhci cache is read during booting prior to the
7502          * lights-out period by mdi_read_devices_files().
7503          */
7504         for (i = 0; i < N_VHCI_CLASSES; i++) {
7505                 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) {
7506                         nvl = vhcache_nvl[i];
7507                         vhcache_nvl[i] = NULL;
7508                         break;
7509                 }
7510         }
7511 
7512         /*
7513          * this is to cover the case of some one manually causing unloading
7514          * (or detaching) and reloading (or attaching) of a vhci driver.
7515          */
7516         if (nvl == NULL && modrootloaded)
7517                 nvl = read_on_disk_vhci_cache(vh->vh_class);
7518 
7519         if (nvl != NULL) {
7520                 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7521                 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS)
7522                         vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
7523                 else  {
7524                         cmn_err(CE_WARN,
7525                             "%s: data file corrupted, will recreate",
7526                             vhc->vhc_vhcache_filename);
7527                 }
7528                 rw_exit(&vhcache->vhcache_lock);
7529                 nvlist_free(nvl);
7530         }
7531 
7532         vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc,
7533             CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush");
7534 
7535         vhc->vhc_path_discovery_boot = mdi_path_discovery_boot;
7536         vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot;
7537 }
7538 
7539 /*
7540  * free all vhci cache related resources
7541  */
7542 static int
7543 destroy_vhci_cache(mdi_vhci_t *vh)
7544 {
7545         mdi_vhci_config_t *vhc = vh->vh_config;
7546         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7547         mdi_vhcache_phci_t *cphci, *cphci_next;
7548         mdi_vhcache_client_t *cct, *cct_next;
7549         mdi_vhcache_pathinfo_t *cpi, *cpi_next;
7550 
7551         if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS)
7552                 return (MDI_FAILURE);
7553 
7554         kmem_free(vhc->vhc_vhcache_filename,
7555             strlen(vhc->vhc_vhcache_filename) + 1);
7556 
7557         mod_hash_destroy_strhash(vhcache->vhcache_client_hash);
7558 
7559         for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7560             cphci = cphci_next) {
7561                 cphci_next = cphci->cphci_next;
7562                 free_vhcache_phci(cphci);
7563         }
7564 
7565         for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) {
7566                 cct_next = cct->cct_next;
7567                 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) {
7568                         cpi_next = cpi->cpi_next;
7569                         free_vhcache_pathinfo(cpi);
7570                 }
7571                 free_vhcache_client(cct);
7572         }
7573 
7574         rw_destroy(&vhcache->vhcache_lock);
7575 
7576         mutex_destroy(&vhc->vhc_lock);
7577         cv_destroy(&vhc->vhc_cv);
7578         kmem_free(vhc, sizeof (mdi_vhci_config_t));
7579         return (MDI_SUCCESS);
7580 }
7581 
7582 /*
7583  * Stop all vhci cache related async threads and free their resources.
7584  */
7585 static int
7586 stop_vhcache_async_threads(mdi_vhci_config_t *vhc)
7587 {
7588         mdi_async_client_config_t *acc, *acc_next;
7589 
7590         mutex_enter(&vhc->vhc_lock);
7591         vhc->vhc_flags |= MDI_VHC_EXIT;
7592         ASSERT(vhc->vhc_acc_thrcount >= 0);
7593         cv_broadcast(&vhc->vhc_cv);
7594 
7595         while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) ||
7596             vhc->vhc_acc_thrcount != 0) {
7597                 mutex_exit(&vhc->vhc_lock);
7598                 delay_random(mdi_delay);
7599                 mutex_enter(&vhc->vhc_lock);
7600         }
7601 
7602         vhc->vhc_flags &= ~MDI_VHC_EXIT;
7603 
7604         for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) {
7605                 acc_next = acc->acc_next;
7606                 free_async_client_config(acc);
7607         }
7608         vhc->vhc_acc_list_head = NULL;
7609         vhc->vhc_acc_list_tail = NULL;
7610         vhc->vhc_acc_count = 0;
7611 
7612         if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7613                 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7614                 mutex_exit(&vhc->vhc_lock);
7615                 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) {
7616                         vhcache_dirty(vhc);
7617                         return (MDI_FAILURE);
7618                 }
7619         } else
7620                 mutex_exit(&vhc->vhc_lock);
7621 
7622         if (callb_delete(vhc->vhc_cbid) != 0)
7623                 return (MDI_FAILURE);
7624 
7625         return (MDI_SUCCESS);
7626 }
7627 
7628 /*
7629  * Stop vhci cache flush thread
7630  */
7631 /* ARGSUSED */
7632 static boolean_t
7633 stop_vhcache_flush_thread(void *arg, int code)
7634 {
7635         mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7636 
7637         mutex_enter(&vhc->vhc_lock);
7638         vhc->vhc_flags |= MDI_VHC_EXIT;
7639         cv_broadcast(&vhc->vhc_cv);
7640 
7641         while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
7642                 mutex_exit(&vhc->vhc_lock);
7643                 delay_random(mdi_delay);
7644                 mutex_enter(&vhc->vhc_lock);
7645         }
7646 
7647         if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7648                 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7649                 mutex_exit(&vhc->vhc_lock);
7650                 (void) flush_vhcache(vhc, 1);
7651         } else
7652                 mutex_exit(&vhc->vhc_lock);
7653 
7654         return (B_TRUE);
7655 }
7656 
7657 /*
7658  * Enqueue the vhcache phci (cphci) at the tail of the list
7659  */
7660 static void
7661 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci)
7662 {
7663         cphci->cphci_next = NULL;
7664         if (vhcache->vhcache_phci_head == NULL)
7665                 vhcache->vhcache_phci_head = cphci;
7666         else
7667                 vhcache->vhcache_phci_tail->cphci_next = cphci;
7668         vhcache->vhcache_phci_tail = cphci;
7669 }
7670 
7671 /*
7672  * Enqueue the vhcache pathinfo (cpi) at the tail of the list
7673  */
7674 static void
7675 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7676     mdi_vhcache_pathinfo_t *cpi)
7677 {
7678         cpi->cpi_next = NULL;
7679         if (cct->cct_cpi_head == NULL)
7680                 cct->cct_cpi_head = cpi;
7681         else
7682                 cct->cct_cpi_tail->cpi_next = cpi;
7683         cct->cct_cpi_tail = cpi;
7684 }
7685 
7686 /*
7687  * Enqueue the vhcache pathinfo (cpi) at the correct location in the
7688  * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
7689  * flag set come at the beginning of the list. All cpis which have this
7690  * flag set come at the end of the list.
7691  */
7692 static void
7693 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7694     mdi_vhcache_pathinfo_t *newcpi)
7695 {
7696         mdi_vhcache_pathinfo_t *cpi, *prev_cpi;
7697 
7698         if (cct->cct_cpi_head == NULL ||
7699             (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))
7700                 enqueue_tail_vhcache_pathinfo(cct, newcpi);
7701         else {
7702                 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL &&
7703                     !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST);
7704                     prev_cpi = cpi, cpi = cpi->cpi_next)
7705                         ;
7706 
7707                 if (prev_cpi == NULL)
7708                         cct->cct_cpi_head = newcpi;
7709                 else
7710                         prev_cpi->cpi_next = newcpi;
7711 
7712                 newcpi->cpi_next = cpi;
7713 
7714                 if (cpi == NULL)
7715                         cct->cct_cpi_tail = newcpi;
7716         }
7717 }
7718 
7719 /*
7720  * Enqueue the vhcache client (cct) at the tail of the list
7721  */
7722 static void
7723 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache,
7724     mdi_vhcache_client_t *cct)
7725 {
7726         cct->cct_next = NULL;
7727         if (vhcache->vhcache_client_head == NULL)
7728                 vhcache->vhcache_client_head = cct;
7729         else
7730                 vhcache->vhcache_client_tail->cct_next = cct;
7731         vhcache->vhcache_client_tail = cct;
7732 }
7733 
7734 static void
7735 free_string_array(char **str, int nelem)
7736 {
7737         int i;
7738 
7739         if (str) {
7740                 for (i = 0; i < nelem; i++) {
7741                         if (str[i])
7742                                 kmem_free(str[i], strlen(str[i]) + 1);
7743                 }
7744                 kmem_free(str, sizeof (char *) * nelem);
7745         }
7746 }
7747 
7748 static void
7749 free_vhcache_phci(mdi_vhcache_phci_t *cphci)
7750 {
7751         kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1);
7752         kmem_free(cphci, sizeof (*cphci));
7753 }
7754 
7755 static void
7756 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi)
7757 {
7758         kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1);
7759         kmem_free(cpi, sizeof (*cpi));
7760 }
7761 
7762 static void
7763 free_vhcache_client(mdi_vhcache_client_t *cct)
7764 {
7765         kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1);
7766         kmem_free(cct, sizeof (*cct));
7767 }
7768 
7769 static char *
7770 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len)
7771 {
7772         char *name_addr;
7773         int len;
7774 
7775         len = strlen(ct_name) + strlen(ct_addr) + 2;
7776         name_addr = kmem_alloc(len, KM_SLEEP);
7777         (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr);
7778 
7779         if (ret_len)
7780                 *ret_len = len;
7781         return (name_addr);
7782 }
7783 
7784 /*
7785  * Copy the contents of paddrnvl to vhci cache.
7786  * paddrnvl nvlist contains path information for a vhci client.
7787  * See the comment in mainnvl_to_vhcache() for the format of this nvlist.
7788  */
7789 static void
7790 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[],
7791     mdi_vhcache_client_t *cct)
7792 {
7793         nvpair_t *nvp = NULL;
7794         mdi_vhcache_pathinfo_t *cpi;
7795         uint_t nelem;
7796         uint32_t *val;
7797 
7798         while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7799                 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY);
7800                 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
7801                 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7802                 (void) nvpair_value_uint32_array(nvp, &val, &nelem);
7803                 ASSERT(nelem == 2);
7804                 cpi->cpi_cphci = cphci_list[val[0]];
7805                 cpi->cpi_flags = val[1];
7806                 enqueue_tail_vhcache_pathinfo(cct, cpi);
7807         }
7808 }
7809 
7810 /*
7811  * Copy the contents of caddrmapnvl to vhci cache.
7812  * caddrmapnvl nvlist contains vhci client address to phci client address
7813  * mappings. See the comment in mainnvl_to_vhcache() for the format of
7814  * this nvlist.
7815  */
7816 static void
7817 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl,
7818     mdi_vhcache_phci_t *cphci_list[])
7819 {
7820         nvpair_t *nvp = NULL;
7821         nvlist_t *paddrnvl;
7822         mdi_vhcache_client_t *cct;
7823 
7824         while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7825                 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST);
7826                 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
7827                 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7828                 (void) nvpair_value_nvlist(nvp, &paddrnvl);
7829                 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct);
7830                 /* the client must contain at least one path */
7831                 ASSERT(cct->cct_cpi_head != NULL);
7832 
7833                 enqueue_vhcache_client(vhcache, cct);
7834                 (void) mod_hash_insert(vhcache->vhcache_client_hash,
7835                     (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
7836         }
7837 }
7838 
7839 /*
7840  * Copy the contents of the main nvlist to vhci cache.
7841  *
7842  * VHCI busconfig cached data is stored in the form of a nvlist on the disk.
7843  * The nvlist contains the mappings between the vhci client addresses and
7844  * their corresponding phci client addresses.
7845  *
7846  * The structure of the nvlist is as follows:
7847  *
7848  * Main nvlist:
7849  *      NAME            TYPE            DATA
7850  *      version         int32           version number
7851  *      phcis           string array    array of phci paths
7852  *      clientaddrmap   nvlist_t        c2paddrs_nvl (see below)
7853  *
7854  * structure of c2paddrs_nvl:
7855  *      NAME            TYPE            DATA
7856  *      caddr1          nvlist_t        paddrs_nvl1
7857  *      caddr2          nvlist_t        paddrs_nvl2
7858  *      ...
7859  * where caddr1, caddr2, ... are vhci client name and addresses in the
7860  * form of "<clientname>@<clientaddress>".
7861  * (for example: "ssd@2000002037cd9f72");
7862  * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information.
7863  *
7864  * structure of paddrs_nvl:
7865  *      NAME            TYPE            DATA
7866  *      pi_addr1        uint32_array    (phci-id, cpi_flags)
7867  *      pi_addr2        uint32_array    (phci-id, cpi_flags)
7868  *      ...
7869  * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes
7870  * (so called pi_addrs, for example: "w2100002037cd9f72,0");
7871  * phci-ids are integers that identify pHCIs to which the
7872  * the bus specific address belongs to. These integers are used as an index
7873  * into to the phcis string array in the main nvlist to get the pHCI path.
7874  */
7875 static int
7876 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl)
7877 {
7878         char **phcis, **phci_namep;
7879         uint_t nphcis;
7880         mdi_vhcache_phci_t *cphci, **cphci_list;
7881         nvlist_t *caddrmapnvl;
7882         int32_t ver;
7883         int i;
7884         size_t cphci_list_size;
7885 
7886         ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock));
7887 
7888         if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 ||
7889             ver != MDI_VHCI_CACHE_VERSION)
7890                 return (MDI_FAILURE);
7891 
7892         if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis,
7893             &nphcis) != 0)
7894                 return (MDI_SUCCESS);
7895 
7896         ASSERT(nphcis > 0);
7897 
7898         cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis;
7899         cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP);
7900         for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) {
7901                 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP);
7902                 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP);
7903                 enqueue_vhcache_phci(vhcache, cphci);
7904                 cphci_list[i] = cphci;
7905         }
7906 
7907         ASSERT(vhcache->vhcache_phci_head != NULL);
7908 
7909         if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0)
7910                 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list);
7911 
7912         kmem_free(cphci_list, cphci_list_size);
7913         return (MDI_SUCCESS);
7914 }
7915 
7916 /*
7917  * Build paddrnvl for the specified client using the information in the
7918  * vhci cache and add it to the caddrmapnnvl.
7919  * Returns 0 on success, errno on failure.
7920  */
7921 static int
7922 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct,
7923     nvlist_t *caddrmapnvl)
7924 {
7925         mdi_vhcache_pathinfo_t *cpi;
7926         nvlist_t *nvl;
7927         int err;
7928         uint32_t val[2];
7929 
7930         ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7931 
7932         if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0)
7933                 return (err);
7934 
7935         for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7936                 val[0] = cpi->cpi_cphci->cphci_id;
7937                 val[1] = cpi->cpi_flags;
7938                 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2))
7939                     != 0)
7940                         goto out;
7941         }
7942 
7943         err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl);
7944 out:
7945         nvlist_free(nvl);
7946         return (err);
7947 }
7948 
7949 /*
7950  * Build caddrmapnvl using the information in the vhci cache
7951  * and add it to the mainnvl.
7952  * Returns 0 on success, errno on failure.
7953  */
7954 static int
7955 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl)
7956 {
7957         mdi_vhcache_client_t *cct;
7958         nvlist_t *nvl;
7959         int err;
7960 
7961         ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7962 
7963         if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0)
7964                 return (err);
7965 
7966         for (cct = vhcache->vhcache_client_head; cct != NULL;
7967             cct = cct->cct_next) {
7968                 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0)
7969                         goto out;
7970         }
7971 
7972         err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl);
7973 out:
7974         nvlist_free(nvl);
7975         return (err);
7976 }
7977 
7978 /*
7979  * Build nvlist using the information in the vhci cache.
7980  * See the comment in mainnvl_to_vhcache() for the format of the nvlist.
7981  * Returns nvl on success, NULL on failure.
7982  */
7983 static nvlist_t *
7984 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache)
7985 {
7986         mdi_vhcache_phci_t *cphci;
7987         uint_t phci_count;
7988         char **phcis;
7989         nvlist_t *nvl;
7990         int err, i;
7991 
7992         if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) {
7993                 nvl = NULL;
7994                 goto out;
7995         }
7996 
7997         if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION,
7998             MDI_VHCI_CACHE_VERSION)) != 0)
7999                 goto out;
8000 
8001         rw_enter(&vhcache->vhcache_lock, RW_READER);
8002         if (vhcache->vhcache_phci_head == NULL) {
8003                 rw_exit(&vhcache->vhcache_lock);
8004                 return (nvl);
8005         }
8006 
8007         phci_count = 0;
8008         for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8009             cphci = cphci->cphci_next)
8010                 cphci->cphci_id = phci_count++;
8011 
8012         /* build phci pathname list */
8013         phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP);
8014         for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL;
8015             cphci = cphci->cphci_next, i++)
8016                 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP);
8017 
8018         err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis,
8019             phci_count);
8020         free_string_array(phcis, phci_count);
8021 
8022         if (err == 0 &&
8023             (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) {
8024                 rw_exit(&vhcache->vhcache_lock);
8025                 return (nvl);
8026         }
8027 
8028         rw_exit(&vhcache->vhcache_lock);
8029 out:
8030         nvlist_free(nvl);
8031         return (NULL);
8032 }
8033 
8034 /*
8035  * Lookup vhcache phci structure for the specified phci path.
8036  */
8037 static mdi_vhcache_phci_t *
8038 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path)
8039 {
8040         mdi_vhcache_phci_t *cphci;
8041 
8042         ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8043 
8044         for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8045             cphci = cphci->cphci_next) {
8046                 if (strcmp(cphci->cphci_path, phci_path) == 0)
8047                         return (cphci);
8048         }
8049 
8050         return (NULL);
8051 }
8052 
8053 /*
8054  * Lookup vhcache phci structure for the specified phci.
8055  */
8056 static mdi_vhcache_phci_t *
8057 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph)
8058 {
8059         mdi_vhcache_phci_t *cphci;
8060 
8061         ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8062 
8063         for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8064             cphci = cphci->cphci_next) {
8065                 if (cphci->cphci_phci == ph)
8066                         return (cphci);
8067         }
8068 
8069         return (NULL);
8070 }
8071 
8072 /*
8073  * Add the specified phci to the vhci cache if not already present.
8074  */
8075 static void
8076 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
8077 {
8078         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8079         mdi_vhcache_phci_t *cphci;
8080         char *pathname;
8081         int cache_updated;
8082 
8083         rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8084 
8085         pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8086         (void) ddi_pathname(ph->ph_dip, pathname);
8087         if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname))
8088             != NULL) {
8089                 cphci->cphci_phci = ph;
8090                 cache_updated = 0;
8091         } else {
8092                 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP);
8093                 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP);
8094                 cphci->cphci_phci = ph;
8095                 enqueue_vhcache_phci(vhcache, cphci);
8096                 cache_updated = 1;
8097         }
8098 
8099         rw_exit(&vhcache->vhcache_lock);
8100 
8101         /*
8102          * Since a new phci has been added, reset
8103          * vhc_path_discovery_cutoff_time to allow for discovery of paths
8104          * during next vhcache_discover_paths().
8105          */
8106         mutex_enter(&vhc->vhc_lock);
8107         vhc->vhc_path_discovery_cutoff_time = 0;
8108         mutex_exit(&vhc->vhc_lock);
8109 
8110         kmem_free(pathname, MAXPATHLEN);
8111         if (cache_updated)
8112                 vhcache_dirty(vhc);
8113 }
8114 
8115 /*
8116  * Remove the reference to the specified phci from the vhci cache.
8117  */
8118 static void
8119 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
8120 {
8121         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8122         mdi_vhcache_phci_t *cphci;
8123 
8124         rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8125         if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) {
8126                 /* do not remove the actual mdi_vhcache_phci structure */
8127                 cphci->cphci_phci = NULL;
8128         }
8129         rw_exit(&vhcache->vhcache_lock);
8130 }
8131 
8132 static void
8133 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst,
8134     mdi_vhcache_lookup_token_t *src)
8135 {
8136         if (src == NULL) {
8137                 dst->lt_cct = NULL;
8138                 dst->lt_cct_lookup_time = 0;
8139         } else {
8140                 dst->lt_cct = src->lt_cct;
8141                 dst->lt_cct_lookup_time = src->lt_cct_lookup_time;
8142         }
8143 }
8144 
8145 /*
8146  * Look up vhcache client for the specified client.
8147  */
8148 static mdi_vhcache_client_t *
8149 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr,
8150     mdi_vhcache_lookup_token_t *token)
8151 {
8152         mod_hash_val_t hv;
8153         char *name_addr;
8154         int len;
8155 
8156         ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8157 
8158         /*
8159          * If no vhcache clean occurred since the last lookup, we can
8160          * simply return the cct from the last lookup operation.
8161          * It works because ccts are never freed except during the vhcache
8162          * cleanup operation.
8163          */
8164         if (token != NULL &&
8165             vhcache->vhcache_clean_time < token->lt_cct_lookup_time)
8166                 return (token->lt_cct);
8167 
8168         name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len);
8169         if (mod_hash_find(vhcache->vhcache_client_hash,
8170             (mod_hash_key_t)name_addr, &hv) == 0) {
8171                 if (token) {
8172                         token->lt_cct = (mdi_vhcache_client_t *)hv;
8173                         token->lt_cct_lookup_time = ddi_get_lbolt64();
8174                 }
8175         } else {
8176                 if (token) {
8177                         token->lt_cct = NULL;
8178                         token->lt_cct_lookup_time = 0;
8179                 }
8180                 hv = NULL;
8181         }
8182         kmem_free(name_addr, len);
8183         return ((mdi_vhcache_client_t *)hv);
8184 }
8185 
8186 /*
8187  * Add the specified path to the vhci cache if not already present.
8188  * Also add the vhcache client for the client corresponding to this path
8189  * if it doesn't already exist.
8190  */
8191 static void
8192 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
8193 {
8194         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8195         mdi_vhcache_client_t *cct;
8196         mdi_vhcache_pathinfo_t *cpi;
8197         mdi_phci_t *ph = pip->pi_phci;
8198         mdi_client_t *ct = pip->pi_client;
8199         int cache_updated = 0;
8200 
8201         rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8202 
8203         /* if vhcache client for this pip doesn't already exist, add it */
8204         if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
8205             NULL)) == NULL) {
8206                 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
8207                 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname,
8208                     ct->ct_guid, NULL);
8209                 enqueue_vhcache_client(vhcache, cct);
8210                 (void) mod_hash_insert(vhcache->vhcache_client_hash,
8211                     (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
8212                 cache_updated = 1;
8213         }
8214 
8215         for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8216                 if (cpi->cpi_cphci->cphci_phci == ph &&
8217                     strcmp(cpi->cpi_addr, pip->pi_addr) == 0) {
8218                         cpi->cpi_pip = pip;
8219                         if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) {
8220                                 cpi->cpi_flags &=
8221                                     ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8222                                 sort_vhcache_paths(cct);
8223                                 cache_updated = 1;
8224                         }
8225                         break;
8226                 }
8227         }
8228 
8229         if (cpi == NULL) {
8230                 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
8231                 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP);
8232                 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph);
8233                 ASSERT(cpi->cpi_cphci != NULL);
8234                 cpi->cpi_pip = pip;
8235                 enqueue_vhcache_pathinfo(cct, cpi);
8236                 cache_updated = 1;
8237         }
8238 
8239         rw_exit(&vhcache->vhcache_lock);
8240 
8241         if (cache_updated)
8242                 vhcache_dirty(vhc);
8243 }
8244 
8245 /*
8246  * Remove the reference to the specified path from the vhci cache.
8247  */
8248 static void
8249 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
8250 {
8251         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8252         mdi_client_t *ct = pip->pi_client;
8253         mdi_vhcache_client_t *cct;
8254         mdi_vhcache_pathinfo_t *cpi;
8255 
8256         rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8257         if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
8258             NULL)) != NULL) {
8259                 for (cpi = cct->cct_cpi_head; cpi != NULL;
8260                     cpi = cpi->cpi_next) {
8261                         if (cpi->cpi_pip == pip) {
8262                                 cpi->cpi_pip = NULL;
8263                                 break;
8264                         }
8265                 }
8266         }
8267         rw_exit(&vhcache->vhcache_lock);
8268 }
8269 
8270 /*
8271  * Flush the vhci cache to disk.
8272  * Returns MDI_SUCCESS on success, MDI_FAILURE on failure.
8273  */
8274 static int
8275 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag)
8276 {
8277         nvlist_t *nvl;
8278         int err;
8279         int rv;
8280 
8281         /*
8282          * It is possible that the system may shutdown before
8283          * i_ddi_io_initialized (during stmsboot for example). To allow for
8284          * flushing the cache in this case do not check for
8285          * i_ddi_io_initialized when force flag is set.
8286          */
8287         if (force_flag == 0 && !i_ddi_io_initialized())
8288                 return (MDI_FAILURE);
8289 
8290         if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) {
8291                 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl);
8292                 nvlist_free(nvl);
8293         } else
8294                 err = EFAULT;
8295 
8296         rv = MDI_SUCCESS;
8297         mutex_enter(&vhc->vhc_lock);
8298         if (err != 0) {
8299                 if (err == EROFS) {
8300                         vhc->vhc_flags |= MDI_VHC_READONLY_FS;
8301                         vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR |
8302                             MDI_VHC_VHCACHE_DIRTY);
8303                 } else {
8304                         if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) {
8305                                 cmn_err(CE_CONT, "%s: update failed\n",
8306                                     vhc->vhc_vhcache_filename);
8307                                 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR;
8308                         }
8309                         rv = MDI_FAILURE;
8310                 }
8311         } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) {
8312                 cmn_err(CE_CONT,
8313                     "%s: update now ok\n", vhc->vhc_vhcache_filename);
8314                 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR;
8315         }
8316         mutex_exit(&vhc->vhc_lock);
8317 
8318         return (rv);
8319 }
8320 
8321 /*
8322  * Call flush_vhcache() to flush the vhci cache at the scheduled time.
8323  * Exits itself if left idle for the idle timeout period.
8324  */
8325 static void
8326 vhcache_flush_thread(void *arg)
8327 {
8328         mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8329         clock_t idle_time, quit_at_ticks;
8330         callb_cpr_t cprinfo;
8331 
8332         /* number of seconds to sleep idle before exiting */
8333         idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND;
8334 
8335         CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
8336             "mdi_vhcache_flush");
8337         mutex_enter(&vhc->vhc_lock);
8338         for (; ; ) {
8339                 while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8340                     (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) {
8341                         if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) {
8342                                 CALLB_CPR_SAFE_BEGIN(&cprinfo);
8343                                 (void) cv_timedwait(&vhc->vhc_cv,
8344                                     &vhc->vhc_lock, vhc->vhc_flush_at_ticks);
8345                                 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8346                         } else {
8347                                 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
8348                                 mutex_exit(&vhc->vhc_lock);
8349 
8350                                 if (flush_vhcache(vhc, 0) != MDI_SUCCESS)
8351                                         vhcache_dirty(vhc);
8352 
8353                                 mutex_enter(&vhc->vhc_lock);
8354                         }
8355                 }
8356 
8357                 quit_at_ticks = ddi_get_lbolt() + idle_time;
8358 
8359                 while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8360                     !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) &&
8361                     ddi_get_lbolt() < quit_at_ticks) {
8362                         CALLB_CPR_SAFE_BEGIN(&cprinfo);
8363                         (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8364                             quit_at_ticks);
8365                         CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8366                 }
8367 
8368                 if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
8369                     !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY))
8370                         goto out;
8371         }
8372 
8373 out:
8374         vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD;
8375         /* CALLB_CPR_EXIT releases the vhc->vhc_lock */
8376         CALLB_CPR_EXIT(&cprinfo);
8377 }
8378 
8379 /*
8380  * Make vhci cache dirty and schedule flushing by vhcache flush thread.
8381  */
8382 static void
8383 vhcache_dirty(mdi_vhci_config_t *vhc)
8384 {
8385         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8386         int create_thread;
8387 
8388         rw_enter(&vhcache->vhcache_lock, RW_READER);
8389         /* do not flush cache until the cache is fully built */
8390         if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
8391                 rw_exit(&vhcache->vhcache_lock);
8392                 return;
8393         }
8394         rw_exit(&vhcache->vhcache_lock);
8395 
8396         mutex_enter(&vhc->vhc_lock);
8397         if (vhc->vhc_flags & MDI_VHC_READONLY_FS) {
8398                 mutex_exit(&vhc->vhc_lock);
8399                 return;
8400         }
8401 
8402         vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY;
8403         vhc->vhc_flush_at_ticks = ddi_get_lbolt() +
8404             mdi_vhcache_flush_delay * TICKS_PER_SECOND;
8405         if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
8406                 cv_broadcast(&vhc->vhc_cv);
8407                 create_thread = 0;
8408         } else {
8409                 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD;
8410                 create_thread = 1;
8411         }
8412         mutex_exit(&vhc->vhc_lock);
8413 
8414         if (create_thread)
8415                 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc,
8416                     0, &p0, TS_RUN, minclsyspri);
8417 }
8418 
8419 /*
8420  * phci bus config structure - one for for each phci bus config operation that
8421  * we initiate on behalf of a vhci.
8422  */
8423 typedef struct mdi_phci_bus_config_s {
8424         char *phbc_phci_path;
8425         struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */
8426         struct mdi_phci_bus_config_s *phbc_next;
8427 } mdi_phci_bus_config_t;
8428 
8429 /* vhci bus config structure - one for each vhci bus config operation */
8430 typedef struct mdi_vhci_bus_config_s {
8431         ddi_bus_config_op_t vhbc_op;    /* bus config op */
8432         major_t vhbc_op_major;          /* bus config op major */
8433         uint_t vhbc_op_flags;           /* bus config op flags */
8434         kmutex_t vhbc_lock;
8435         kcondvar_t vhbc_cv;
8436         int vhbc_thr_count;
8437 } mdi_vhci_bus_config_t;
8438 
8439 /*
8440  * bus config the specified phci
8441  */
8442 static void
8443 bus_config_phci(void *arg)
8444 {
8445         mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg;
8446         mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig;
8447         dev_info_t *ph_dip;
8448 
8449         /*
8450          * first configure all path components upto phci and then configure
8451          * the phci children.
8452          */
8453         if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0))
8454             != NULL) {
8455                 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER ||
8456                     vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) {
8457                         (void) ndi_devi_config_driver(ph_dip,
8458                             vhbc->vhbc_op_flags,
8459                             vhbc->vhbc_op_major);
8460                 } else
8461                         (void) ndi_devi_config(ph_dip,
8462                             vhbc->vhbc_op_flags);
8463 
8464                 /* release the hold that e_ddi_hold_devi_by_path() placed */
8465                 ndi_rele_devi(ph_dip);
8466         }
8467 
8468         kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1);
8469         kmem_free(phbc, sizeof (*phbc));
8470 
8471         mutex_enter(&vhbc->vhbc_lock);
8472         vhbc->vhbc_thr_count--;
8473         if (vhbc->vhbc_thr_count == 0)
8474                 cv_broadcast(&vhbc->vhbc_cv);
8475         mutex_exit(&vhbc->vhbc_lock);
8476 }
8477 
8478 /*
8479  * Bus config all phcis associated with the vhci in parallel.
8480  * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL.
8481  */
8482 static void
8483 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags,
8484     ddi_bus_config_op_t op, major_t maj)
8485 {
8486         mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next;
8487         mdi_vhci_bus_config_t *vhbc;
8488         mdi_vhcache_phci_t *cphci;
8489 
8490         rw_enter(&vhcache->vhcache_lock, RW_READER);
8491         if (vhcache->vhcache_phci_head == NULL) {
8492                 rw_exit(&vhcache->vhcache_lock);
8493                 return;
8494         }
8495 
8496         vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP);
8497 
8498         for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8499             cphci = cphci->cphci_next) {
8500                 /* skip phcis that haven't attached before root is available */
8501                 if (!modrootloaded && (cphci->cphci_phci == NULL))
8502                         continue;
8503                 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP);
8504                 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path,
8505                     KM_SLEEP);
8506                 phbc->phbc_vhbusconfig = vhbc;
8507                 phbc->phbc_next = phbc_head;
8508                 phbc_head = phbc;
8509                 vhbc->vhbc_thr_count++;
8510         }
8511         rw_exit(&vhcache->vhcache_lock);
8512 
8513         vhbc->vhbc_op = op;
8514         vhbc->vhbc_op_major = maj;
8515         vhbc->vhbc_op_flags = NDI_NO_EVENT |
8516             (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE));
8517         mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL);
8518         cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL);
8519 
8520         /* now create threads to initiate bus config on all phcis in parallel */
8521         for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) {
8522                 phbc_next = phbc->phbc_next;
8523                 if (mdi_mtc_off)
8524                         bus_config_phci((void *)phbc);
8525                 else
8526                         (void) thread_create(NULL, 0, bus_config_phci, phbc,
8527                             0, &p0, TS_RUN, minclsyspri);
8528         }
8529 
8530         mutex_enter(&vhbc->vhbc_lock);
8531         /* wait until all threads exit */
8532         while (vhbc->vhbc_thr_count > 0)
8533                 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock);
8534         mutex_exit(&vhbc->vhbc_lock);
8535 
8536         mutex_destroy(&vhbc->vhbc_lock);
8537         cv_destroy(&vhbc->vhbc_cv);
8538         kmem_free(vhbc, sizeof (*vhbc));
8539 }
8540 
8541 /*
8542  * Single threaded version of bus_config_all_phcis()
8543  */
8544 static void
8545 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags,
8546     ddi_bus_config_op_t op, major_t maj)
8547 {
8548         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8549 
8550         single_threaded_vhconfig_enter(vhc);
8551         bus_config_all_phcis(vhcache, flags, op, maj);
8552         single_threaded_vhconfig_exit(vhc);
8553 }
8554 
8555 /*
8556  * Perform BUS_CONFIG_ONE on the specified child of the phci.
8557  * The path includes the child component in addition to the phci path.
8558  */
8559 static int
8560 bus_config_one_phci_child(char *path)
8561 {
8562         dev_info_t *ph_dip, *child;
8563         char *devnm;
8564         int rv = MDI_FAILURE;
8565 
8566         /* extract the child component of the phci */
8567         devnm = strrchr(path, '/');
8568         *devnm++ = '\0';
8569 
8570         /*
8571          * first configure all path components upto phci and then
8572          * configure the phci child.
8573          */
8574         if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) {
8575                 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) ==
8576                     NDI_SUCCESS) {
8577                         /*
8578                          * release the hold that ndi_devi_config_one() placed
8579                          */
8580                         ndi_rele_devi(child);
8581                         rv = MDI_SUCCESS;
8582                 }
8583 
8584                 /* release the hold that e_ddi_hold_devi_by_path() placed */
8585                 ndi_rele_devi(ph_dip);
8586         }
8587 
8588         devnm--;
8589         *devnm = '/';
8590         return (rv);
8591 }
8592 
8593 /*
8594  * Build a list of phci client paths for the specified vhci client.
8595  * The list includes only those phci client paths which aren't configured yet.
8596  */
8597 static mdi_phys_path_t *
8598 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name)
8599 {
8600         mdi_vhcache_pathinfo_t *cpi;
8601         mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp;
8602         int config_path, len;
8603 
8604         for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8605                 /*
8606                  * include only those paths that aren't configured.
8607                  */
8608                 config_path = 0;
8609                 if (cpi->cpi_pip == NULL)
8610                         config_path = 1;
8611                 else {
8612                         MDI_PI_LOCK(cpi->cpi_pip);
8613                         if (MDI_PI_IS_INIT(cpi->cpi_pip))
8614                                 config_path = 1;
8615                         MDI_PI_UNLOCK(cpi->cpi_pip);
8616                 }
8617 
8618                 if (config_path) {
8619                         pp = kmem_alloc(sizeof (*pp), KM_SLEEP);
8620                         len = strlen(cpi->cpi_cphci->cphci_path) +
8621                             strlen(ct_name) + strlen(cpi->cpi_addr) + 3;
8622                         pp->phys_path = kmem_alloc(len, KM_SLEEP);
8623                         (void) snprintf(pp->phys_path, len, "%s/%s@%s",
8624                             cpi->cpi_cphci->cphci_path, ct_name,
8625                             cpi->cpi_addr);
8626                         pp->phys_path_next = NULL;
8627 
8628                         if (pp_head == NULL)
8629                                 pp_head = pp;
8630                         else
8631                                 pp_tail->phys_path_next = pp;
8632                         pp_tail = pp;
8633                 }
8634         }
8635 
8636         return (pp_head);
8637 }
8638 
8639 /*
8640  * Free the memory allocated for phci client path list.
8641  */
8642 static void
8643 free_phclient_path_list(mdi_phys_path_t *pp_head)
8644 {
8645         mdi_phys_path_t *pp, *pp_next;
8646 
8647         for (pp = pp_head; pp != NULL; pp = pp_next) {
8648                 pp_next = pp->phys_path_next;
8649                 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1);
8650                 kmem_free(pp, sizeof (*pp));
8651         }
8652 }
8653 
8654 /*
8655  * Allocated async client structure and initialize with the specified values.
8656  */
8657 static mdi_async_client_config_t *
8658 alloc_async_client_config(char *ct_name, char *ct_addr,
8659     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8660 {
8661         mdi_async_client_config_t *acc;
8662 
8663         acc = kmem_alloc(sizeof (*acc), KM_SLEEP);
8664         acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP);
8665         acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP);
8666         acc->acc_phclient_path_list_head = pp_head;
8667         init_vhcache_lookup_token(&acc->acc_token, tok);
8668         acc->acc_next = NULL;
8669         return (acc);
8670 }
8671 
8672 /*
8673  * Free the memory allocated for the async client structure and their members.
8674  */
8675 static void
8676 free_async_client_config(mdi_async_client_config_t *acc)
8677 {
8678         if (acc->acc_phclient_path_list_head)
8679                 free_phclient_path_list(acc->acc_phclient_path_list_head);
8680         kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1);
8681         kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1);
8682         kmem_free(acc, sizeof (*acc));
8683 }
8684 
8685 /*
8686  * Sort vhcache pathinfos (cpis) of the specified client.
8687  * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
8688  * flag set come at the beginning of the list. All cpis which have this
8689  * flag set come at the end of the list.
8690  */
8691 static void
8692 sort_vhcache_paths(mdi_vhcache_client_t *cct)
8693 {
8694         mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head;
8695 
8696         cpi_head = cct->cct_cpi_head;
8697         cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
8698         for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
8699                 cpi_next = cpi->cpi_next;
8700                 enqueue_vhcache_pathinfo(cct, cpi);
8701         }
8702 }
8703 
8704 /*
8705  * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for
8706  * every vhcache pathinfo of the specified client. If not adjust the flag
8707  * setting appropriately.
8708  *
8709  * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the
8710  * on-disk vhci cache. So every time this flag is updated the cache must be
8711  * flushed.
8712  */
8713 static void
8714 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8715     mdi_vhcache_lookup_token_t *tok)
8716 {
8717         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8718         mdi_vhcache_client_t *cct;
8719         mdi_vhcache_pathinfo_t *cpi;
8720 
8721         rw_enter(&vhcache->vhcache_lock, RW_READER);
8722         if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok))
8723             == NULL) {
8724                 rw_exit(&vhcache->vhcache_lock);
8725                 return;
8726         }
8727 
8728         /*
8729          * to avoid unnecessary on-disk cache updates, first check if an
8730          * update is really needed. If no update is needed simply return.
8731          */
8732         for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8733                 if ((cpi->cpi_pip != NULL &&
8734                     (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) ||
8735                     (cpi->cpi_pip == NULL &&
8736                     !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) {
8737                         break;
8738                 }
8739         }
8740         if (cpi == NULL) {
8741                 rw_exit(&vhcache->vhcache_lock);
8742                 return;
8743         }
8744 
8745         if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) {
8746                 rw_exit(&vhcache->vhcache_lock);
8747                 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8748                 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr,
8749                     tok)) == NULL) {
8750                         rw_exit(&vhcache->vhcache_lock);
8751                         return;
8752                 }
8753         }
8754 
8755         for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8756                 if (cpi->cpi_pip != NULL)
8757                         cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8758                 else
8759                         cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8760         }
8761         sort_vhcache_paths(cct);
8762 
8763         rw_exit(&vhcache->vhcache_lock);
8764         vhcache_dirty(vhc);
8765 }
8766 
8767 /*
8768  * Configure all specified paths of the client.
8769  */
8770 static void
8771 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8772     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8773 {
8774         mdi_phys_path_t *pp;
8775 
8776         for (pp = pp_head; pp != NULL; pp = pp->phys_path_next)
8777                 (void) bus_config_one_phci_child(pp->phys_path);
8778         adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok);
8779 }
8780 
8781 /*
8782  * Dequeue elements from vhci async client config list and bus configure
8783  * their corresponding phci clients.
8784  */
8785 static void
8786 config_client_paths_thread(void *arg)
8787 {
8788         mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8789         mdi_async_client_config_t *acc;
8790         clock_t quit_at_ticks;
8791         clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND;
8792         callb_cpr_t cprinfo;
8793 
8794         CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
8795             "mdi_config_client_paths");
8796 
8797         for (; ; ) {
8798                 quit_at_ticks = ddi_get_lbolt() + idle_time;
8799 
8800                 mutex_enter(&vhc->vhc_lock);
8801                 while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8802                     vhc->vhc_acc_list_head == NULL &&
8803                     ddi_get_lbolt() < quit_at_ticks) {
8804                         CALLB_CPR_SAFE_BEGIN(&cprinfo);
8805                         (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8806                             quit_at_ticks);
8807                         CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8808                 }
8809 
8810                 if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
8811                     vhc->vhc_acc_list_head == NULL)
8812                         goto out;
8813 
8814                 acc = vhc->vhc_acc_list_head;
8815                 vhc->vhc_acc_list_head = acc->acc_next;
8816                 if (vhc->vhc_acc_list_head == NULL)
8817                         vhc->vhc_acc_list_tail = NULL;
8818                 vhc->vhc_acc_count--;
8819                 mutex_exit(&vhc->vhc_lock);
8820 
8821                 config_client_paths_sync(vhc, acc->acc_ct_name,
8822                     acc->acc_ct_addr, acc->acc_phclient_path_list_head,
8823                     &acc->acc_token);
8824 
8825                 free_async_client_config(acc);
8826         }
8827 
8828 out:
8829         vhc->vhc_acc_thrcount--;
8830         /* CALLB_CPR_EXIT releases the vhc->vhc_lock */
8831         CALLB_CPR_EXIT(&cprinfo);
8832 }
8833 
8834 /*
8835  * Arrange for all the phci client paths (pp_head) for the specified client
8836  * to be bus configured asynchronously by a thread.
8837  */
8838 static void
8839 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8840     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8841 {
8842         mdi_async_client_config_t *acc, *newacc;
8843         int create_thread;
8844 
8845         if (pp_head == NULL)
8846                 return;
8847 
8848         if (mdi_mtc_off) {
8849                 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok);
8850                 free_phclient_path_list(pp_head);
8851                 return;
8852         }
8853 
8854         newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok);
8855         ASSERT(newacc);
8856 
8857         mutex_enter(&vhc->vhc_lock);
8858         for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) {
8859                 if (strcmp(ct_name, acc->acc_ct_name) == 0 &&
8860                     strcmp(ct_addr, acc->acc_ct_addr) == 0) {
8861                         free_async_client_config(newacc);
8862                         mutex_exit(&vhc->vhc_lock);
8863                         return;
8864                 }
8865         }
8866 
8867         if (vhc->vhc_acc_list_head == NULL)
8868                 vhc->vhc_acc_list_head = newacc;
8869         else
8870                 vhc->vhc_acc_list_tail->acc_next = newacc;
8871         vhc->vhc_acc_list_tail = newacc;
8872         vhc->vhc_acc_count++;
8873         if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) {
8874                 cv_broadcast(&vhc->vhc_cv);
8875                 create_thread = 0;
8876         } else {
8877                 vhc->vhc_acc_thrcount++;
8878                 create_thread = 1;
8879         }
8880         mutex_exit(&vhc->vhc_lock);
8881 
8882         if (create_thread)
8883                 (void) thread_create(NULL, 0, config_client_paths_thread, vhc,
8884                     0, &p0, TS_RUN, minclsyspri);
8885 }
8886 
8887 /*
8888  * Return number of online paths for the specified client.
8889  */
8890 static int
8891 nonline_paths(mdi_vhcache_client_t *cct)
8892 {
8893         mdi_vhcache_pathinfo_t *cpi;
8894         int online_count = 0;
8895 
8896         for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8897                 if (cpi->cpi_pip != NULL) {
8898                         MDI_PI_LOCK(cpi->cpi_pip);
8899                         if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE)
8900                                 online_count++;
8901                         MDI_PI_UNLOCK(cpi->cpi_pip);
8902                 }
8903         }
8904 
8905         return (online_count);
8906 }
8907 
8908 /*
8909  * Bus configure all paths for the specified vhci client.
8910  * If at least one path for the client is already online, the remaining paths
8911  * will be configured asynchronously. Otherwise, it synchronously configures
8912  * the paths until at least one path is online and then rest of the paths
8913  * will be configured asynchronously.
8914  */
8915 static void
8916 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr)
8917 {
8918         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8919         mdi_phys_path_t *pp_head, *pp;
8920         mdi_vhcache_client_t *cct;
8921         mdi_vhcache_lookup_token_t tok;
8922 
8923         ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8924 
8925         init_vhcache_lookup_token(&tok, NULL);
8926 
8927         if (ct_name == NULL || ct_addr == NULL ||
8928             (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok))
8929             == NULL ||
8930             (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) {
8931                 rw_exit(&vhcache->vhcache_lock);
8932                 return;
8933         }
8934 
8935         /* if at least one path is online, configure the rest asynchronously */
8936         if (nonline_paths(cct) > 0) {
8937                 rw_exit(&vhcache->vhcache_lock);
8938                 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok);
8939                 return;
8940         }
8941 
8942         rw_exit(&vhcache->vhcache_lock);
8943 
8944         for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) {
8945                 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) {
8946                         rw_enter(&vhcache->vhcache_lock, RW_READER);
8947 
8948                         if ((cct = lookup_vhcache_client(vhcache, ct_name,
8949                             ct_addr, &tok)) == NULL) {
8950                                 rw_exit(&vhcache->vhcache_lock);
8951                                 goto out;
8952                         }
8953 
8954                         if (nonline_paths(cct) > 0 &&
8955                             pp->phys_path_next != NULL) {
8956                                 rw_exit(&vhcache->vhcache_lock);
8957                                 config_client_paths_async(vhc, ct_name, ct_addr,
8958                                     pp->phys_path_next, &tok);
8959                                 pp->phys_path_next = NULL;
8960                                 goto out;
8961                         }
8962 
8963                         rw_exit(&vhcache->vhcache_lock);
8964                 }
8965         }
8966 
8967         adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok);
8968 out:
8969         free_phclient_path_list(pp_head);
8970 }
8971 
8972 static void
8973 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc)
8974 {
8975         mutex_enter(&vhc->vhc_lock);
8976         while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED)
8977                 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock);
8978         vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED;
8979         mutex_exit(&vhc->vhc_lock);
8980 }
8981 
8982 static void
8983 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc)
8984 {
8985         mutex_enter(&vhc->vhc_lock);
8986         vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED;
8987         cv_broadcast(&vhc->vhc_cv);
8988         mutex_exit(&vhc->vhc_lock);
8989 }
8990 
8991 typedef struct mdi_phci_driver_info {
8992         char    *phdriver_name; /* name of the phci driver */
8993 
8994         /* set to non zero if the phci driver supports root device */
8995         int     phdriver_root_support;
8996 } mdi_phci_driver_info_t;
8997 
8998 /*
8999  * vhci class and root support capability of a phci driver can be
9000  * specified using ddi-vhci-class and ddi-no-root-support properties in the
9001  * phci driver.conf file. The built-in tables below contain this information
9002  * for those phci drivers whose driver.conf files don't yet contain this info.
9003  *
9004  * All phci drivers expect iscsi have root device support.
9005  */
9006 static mdi_phci_driver_info_t scsi_phci_driver_list[] = {
9007         { "fp", 1 },
9008         { "iscsi", 0 },
9009         { "ibsrp", 1 }
9010         };
9011 
9012 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 };
9013 
9014 static void *
9015 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size)
9016 {
9017         void *new_ptr;
9018 
9019         new_ptr = kmem_zalloc(new_size, KM_SLEEP);
9020         if (old_ptr) {
9021                 bcopy(old_ptr, new_ptr, MIN(old_size, new_size));
9022                 kmem_free(old_ptr, old_size);
9023         }
9024         return (new_ptr);
9025 }
9026 
9027 static void
9028 add_to_phci_list(char ***driver_list, int **root_support_list,
9029     int *cur_elements, int *max_elements, char *driver_name, int root_support)
9030 {
9031         ASSERT(*cur_elements <= *max_elements);
9032         if (*cur_elements == *max_elements) {
9033                 *max_elements += 10;
9034                 *driver_list = mdi_realloc(*driver_list,
9035                     sizeof (char *) * (*cur_elements),
9036                     sizeof (char *) * (*max_elements));
9037                 *root_support_list = mdi_realloc(*root_support_list,
9038                     sizeof (int) * (*cur_elements),
9039                     sizeof (int) * (*max_elements));
9040         }
9041         (*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP);
9042         (*root_support_list)[*cur_elements] = root_support;
9043         (*cur_elements)++;
9044 }
9045 
9046 static void
9047 get_phci_driver_list(char *vhci_class, char ***driver_list,
9048     int **root_support_list, int *cur_elements, int *max_elements)
9049 {
9050         mdi_phci_driver_info_t  *st_driver_list, *p;
9051         int             st_ndrivers, root_support, i, j, driver_conf_count;
9052         major_t         m;
9053         struct devnames *dnp;
9054         ddi_prop_t      *propp;
9055 
9056         *driver_list = NULL;
9057         *root_support_list = NULL;
9058         *cur_elements = 0;
9059         *max_elements = 0;
9060 
9061         /* add the phci drivers derived from the phci driver.conf files */
9062         for (m = 0; m < devcnt; m++) {
9063                 dnp = &devnamesp[m];
9064 
9065                 if (dnp->dn_flags & DN_PHCI_DRIVER) {
9066                         LOCK_DEV_OPS(&dnp->dn_lock);
9067                         if (dnp->dn_global_prop_ptr != NULL &&
9068                             (propp = i_ddi_prop_search(DDI_DEV_T_ANY,
9069                             DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING,
9070                             &dnp->dn_global_prop_ptr->prop_list)) != NULL &&
9071                             strcmp(propp->prop_val, vhci_class) == 0) {
9072 
9073                                 root_support = (i_ddi_prop_search(DDI_DEV_T_ANY,
9074                                     DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT,
9075                                     &dnp->dn_global_prop_ptr->prop_list)
9076                                     == NULL) ? 1 : 0;
9077 
9078                                 add_to_phci_list(driver_list, root_support_list,
9079                                     cur_elements, max_elements, dnp->dn_name,
9080                                     root_support);
9081 
9082                                 UNLOCK_DEV_OPS(&dnp->dn_lock);
9083                         } else
9084                                 UNLOCK_DEV_OPS(&dnp->dn_lock);
9085                 }
9086         }
9087 
9088         driver_conf_count = *cur_elements;
9089 
9090         /* add the phci drivers specified in the built-in tables */
9091         if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) {
9092                 st_driver_list = scsi_phci_driver_list;
9093                 st_ndrivers = sizeof (scsi_phci_driver_list) /
9094                     sizeof (mdi_phci_driver_info_t);
9095         } else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) {
9096                 st_driver_list = ib_phci_driver_list;
9097                 st_ndrivers = sizeof (ib_phci_driver_list) /
9098                     sizeof (mdi_phci_driver_info_t);
9099         } else {
9100                 st_driver_list = NULL;
9101                 st_ndrivers = 0;
9102         }
9103 
9104         for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) {
9105                 /* add this phci driver if not already added before */
9106                 for (j = 0; j < driver_conf_count; j++) {
9107                         if (strcmp((*driver_list)[j], p->phdriver_name) == 0)
9108                                 break;
9109                 }
9110                 if (j == driver_conf_count) {
9111                         add_to_phci_list(driver_list, root_support_list,
9112                             cur_elements, max_elements, p->phdriver_name,
9113                             p->phdriver_root_support);
9114                 }
9115         }
9116 }
9117 
9118 /*
9119  * Attach the phci driver instances associated with the specified vhci class.
9120  * If root is mounted attach all phci driver instances.
9121  * If root is not mounted, attach the instances of only those phci
9122  * drivers that have the root support.
9123  */
9124 static void
9125 attach_phci_drivers(char *vhci_class)
9126 {
9127         char    **driver_list, **p;
9128         int     *root_support_list;
9129         int     cur_elements, max_elements, i;
9130         major_t m;
9131 
9132         get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
9133             &cur_elements, &max_elements);
9134 
9135         for (i = 0; i < cur_elements; i++) {
9136                 if (modrootloaded || root_support_list[i]) {
9137                         m = ddi_name_to_major(driver_list[i]);
9138                         if (m != DDI_MAJOR_T_NONE &&
9139                             ddi_hold_installed_driver(m))
9140                                 ddi_rele_driver(m);
9141                 }
9142         }
9143 
9144         if (driver_list) {
9145                 for (i = 0, p = driver_list; i < cur_elements; i++, p++)
9146                         kmem_free(*p, strlen(*p) + 1);
9147                 kmem_free(driver_list, sizeof (char *) * max_elements);
9148                 kmem_free(root_support_list, sizeof (int) * max_elements);
9149         }
9150 }
9151 
9152 /*
9153  * Build vhci cache:
9154  *
9155  * Attach phci driver instances and then drive BUS_CONFIG_ALL on
9156  * the phci driver instances. During this process the cache gets built.
9157  *
9158  * Cache is built fully if the root is mounted.
9159  * If the root is not mounted, phci drivers that do not have root support
9160  * are not attached. As a result the cache is built partially. The entries
9161  * in the cache reflect only those phci drivers that have root support.
9162  */
9163 static int
9164 build_vhci_cache(mdi_vhci_t *vh)
9165 {
9166         mdi_vhci_config_t *vhc = vh->vh_config;
9167         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9168 
9169         single_threaded_vhconfig_enter(vhc);
9170 
9171         rw_enter(&vhcache->vhcache_lock, RW_READER);
9172         if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) {
9173                 rw_exit(&vhcache->vhcache_lock);
9174                 single_threaded_vhconfig_exit(vhc);
9175                 return (0);
9176         }
9177         rw_exit(&vhcache->vhcache_lock);
9178 
9179         attach_phci_drivers(vh->vh_class);
9180         bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT,
9181             BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
9182 
9183         rw_enter(&vhcache->vhcache_lock, RW_WRITER);
9184         vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
9185         rw_exit(&vhcache->vhcache_lock);
9186 
9187         single_threaded_vhconfig_exit(vhc);
9188         vhcache_dirty(vhc);
9189         return (1);
9190 }
9191 
9192 /*
9193  * Determine if discovery of paths is needed.
9194  */
9195 static int
9196 vhcache_do_discovery(mdi_vhci_config_t *vhc)
9197 {
9198         int rv = 1;
9199 
9200         mutex_enter(&vhc->vhc_lock);
9201         if (i_ddi_io_initialized() == 0) {
9202                 if (vhc->vhc_path_discovery_boot > 0) {
9203                         vhc->vhc_path_discovery_boot--;
9204                         goto out;
9205                 }
9206         } else {
9207                 if (vhc->vhc_path_discovery_postboot > 0) {
9208                         vhc->vhc_path_discovery_postboot--;
9209                         goto out;
9210                 }
9211         }
9212 
9213         /*
9214          * Do full path discovery at most once per mdi_path_discovery_interval.
9215          * This is to avoid a series of full path discoveries when opening
9216          * stale /dev/[r]dsk links.
9217          */
9218         if (mdi_path_discovery_interval != -1 &&
9219             ddi_get_lbolt64() >= vhc->vhc_path_discovery_cutoff_time)
9220                 goto out;
9221 
9222         rv = 0;
9223 out:
9224         mutex_exit(&vhc->vhc_lock);
9225         return (rv);
9226 }
9227 
9228 /*
9229  * Discover all paths:
9230  *
9231  * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci
9232  * driver instances. During this process all paths will be discovered.
9233  */
9234 static int
9235 vhcache_discover_paths(mdi_vhci_t *vh)
9236 {
9237         mdi_vhci_config_t *vhc = vh->vh_config;
9238         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9239         int rv = 0;
9240 
9241         single_threaded_vhconfig_enter(vhc);
9242 
9243         if (vhcache_do_discovery(vhc)) {
9244                 attach_phci_drivers(vh->vh_class);
9245                 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE |
9246                     NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
9247 
9248                 mutex_enter(&vhc->vhc_lock);
9249                 vhc->vhc_path_discovery_cutoff_time = ddi_get_lbolt64() +
9250                     mdi_path_discovery_interval * TICKS_PER_SECOND;
9251                 mutex_exit(&vhc->vhc_lock);
9252                 rv = 1;
9253         }
9254 
9255         single_threaded_vhconfig_exit(vhc);
9256         return (rv);
9257 }
9258 
9259 /*
9260  * Generic vhci bus config implementation:
9261  *
9262  * Parameters
9263  *      vdip    vhci dip
9264  *      flags   bus config flags
9265  *      op      bus config operation
9266  *      The remaining parameters are bus config operation specific
9267  *
9268  * for BUS_CONFIG_ONE
9269  *      arg     pointer to name@addr
9270  *      child   upon successful return from this function, *child will be
9271  *              set to the configured and held devinfo child node of vdip.
9272  *      ct_addr pointer to client address (i.e. GUID)
9273  *
9274  * for BUS_CONFIG_DRIVER
9275  *      arg     major number of the driver
9276  *      child and ct_addr parameters are ignored
9277  *
9278  * for BUS_CONFIG_ALL
9279  *      arg, child, and ct_addr parameters are ignored
9280  *
9281  * Note that for the rest of the bus config operations, this function simply
9282  * calls the framework provided default bus config routine.
9283  */
9284 int
9285 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op,
9286     void *arg, dev_info_t **child, char *ct_addr)
9287 {
9288         mdi_vhci_t *vh = i_devi_get_vhci(vdip);
9289         mdi_vhci_config_t *vhc = vh->vh_config;
9290         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9291         int rv = 0;
9292         int params_valid = 0;
9293         char *cp;
9294 
9295         /*
9296          * To bus config vhcis we relay operation, possibly using another
9297          * thread, to phcis. The phci driver then interacts with MDI to cause
9298          * vhci child nodes to be enumerated under the vhci node.  Adding a
9299          * vhci child requires an ndi_devi_enter of the vhci. Since another
9300          * thread may be adding the child, to avoid deadlock we can't wait
9301          * for the relayed operations to complete if we have already entered
9302          * the vhci node.
9303          */
9304         if (DEVI_BUSY_OWNED(vdip)) {
9305                 MDI_DEBUG(2, (MDI_NOTE, vdip,
9306                     "vhci dip is busy owned %p", (void *)vdip));
9307                 goto default_bus_config;
9308         }
9309 
9310         rw_enter(&vhcache->vhcache_lock, RW_READER);
9311         if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
9312                 rw_exit(&vhcache->vhcache_lock);
9313                 rv = build_vhci_cache(vh);
9314                 rw_enter(&vhcache->vhcache_lock, RW_READER);
9315         }
9316 
9317         switch (op) {
9318         case BUS_CONFIG_ONE:
9319                 if (arg != NULL && ct_addr != NULL) {
9320                         /* extract node name */
9321                         cp = (char *)arg;
9322                         while (*cp != '\0' && *cp != '@')
9323                                 cp++;
9324                         if (*cp == '@') {
9325                                 params_valid = 1;
9326                                 *cp = '\0';
9327                                 config_client_paths(vhc, (char *)arg, ct_addr);
9328                                 /* config_client_paths() releases cache_lock */
9329                                 *cp = '@';
9330                                 break;
9331                         }
9332                 }
9333 
9334                 rw_exit(&vhcache->vhcache_lock);
9335                 break;
9336 
9337         case BUS_CONFIG_DRIVER:
9338                 rw_exit(&vhcache->vhcache_lock);
9339                 if (rv == 0)
9340                         st_bus_config_all_phcis(vhc, flags, op,
9341                             (major_t)(uintptr_t)arg);
9342                 break;
9343 
9344         case BUS_CONFIG_ALL:
9345                 rw_exit(&vhcache->vhcache_lock);
9346                 if (rv == 0)
9347                         st_bus_config_all_phcis(vhc, flags, op, -1);
9348                 break;
9349 
9350         default:
9351                 rw_exit(&vhcache->vhcache_lock);
9352                 break;
9353         }
9354 
9355 
9356 default_bus_config:
9357         /*
9358          * All requested child nodes are enumerated under the vhci.
9359          * Now configure them.
9360          */
9361         if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
9362             NDI_SUCCESS) {
9363                 return (MDI_SUCCESS);
9364         } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) {
9365                 /* discover all paths and try configuring again */
9366                 if (vhcache_discover_paths(vh) &&
9367                     ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
9368                     NDI_SUCCESS)
9369                         return (MDI_SUCCESS);
9370         }
9371 
9372         return (MDI_FAILURE);
9373 }
9374 
9375 /*
9376  * Read the on-disk vhci cache into an nvlist for the specified vhci class.
9377  */
9378 static nvlist_t *
9379 read_on_disk_vhci_cache(char *vhci_class)
9380 {
9381         nvlist_t *nvl;
9382         int err;
9383         char *filename;
9384 
9385         filename = vhclass2vhcache_filename(vhci_class);
9386 
9387         if ((err = fread_nvlist(filename, &nvl)) == 0) {
9388                 kmem_free(filename, strlen(filename) + 1);
9389                 return (nvl);
9390         } else if (err == EIO)
9391                 cmn_err(CE_WARN, "%s: I/O error, will recreate", filename);
9392         else if (err == EINVAL)
9393                 cmn_err(CE_WARN,
9394                     "%s: data file corrupted, will recreate", filename);
9395 
9396         kmem_free(filename, strlen(filename) + 1);
9397         return (NULL);
9398 }
9399 
9400 /*
9401  * Read on-disk vhci cache into nvlists for all vhci classes.
9402  * Called during booting by i_ddi_read_devices_files().
9403  */
9404 void
9405 mdi_read_devices_files(void)
9406 {
9407         int i;
9408 
9409         for (i = 0; i < N_VHCI_CLASSES; i++)
9410                 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]);
9411 }
9412 
9413 /*
9414  * Remove all stale entries from vhci cache.
9415  */
9416 static void
9417 clean_vhcache(mdi_vhci_config_t *vhc)
9418 {
9419         mdi_vhci_cache_t        *vhcache = &vhc->vhc_vhcache;
9420         mdi_vhcache_phci_t      *phci, *nxt_phci;
9421         mdi_vhcache_client_t    *client, *nxt_client;
9422         mdi_vhcache_pathinfo_t  *path, *nxt_path;
9423 
9424         rw_enter(&vhcache->vhcache_lock, RW_WRITER);
9425 
9426         client = vhcache->vhcache_client_head;
9427         vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL;
9428         for ( ; client != NULL; client = nxt_client) {
9429                 nxt_client = client->cct_next;
9430 
9431                 path = client->cct_cpi_head;
9432                 client->cct_cpi_head = client->cct_cpi_tail = NULL;
9433                 for ( ; path != NULL; path = nxt_path) {
9434                         nxt_path = path->cpi_next;
9435                         if ((path->cpi_cphci->cphci_phci != NULL) &&
9436                             (path->cpi_pip != NULL)) {
9437                                 enqueue_tail_vhcache_pathinfo(client, path);
9438                         } else if (path->cpi_pip != NULL) {
9439                                 /* Not valid to have a path without a phci. */
9440                                 free_vhcache_pathinfo(path);
9441                         }
9442                 }
9443 
9444                 if (client->cct_cpi_head != NULL)
9445                         enqueue_vhcache_client(vhcache, client);
9446                 else {
9447                         (void) mod_hash_destroy(vhcache->vhcache_client_hash,
9448                             (mod_hash_key_t)client->cct_name_addr);
9449                         free_vhcache_client(client);
9450                 }
9451         }
9452 
9453         phci = vhcache->vhcache_phci_head;
9454         vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL;
9455         for ( ; phci != NULL; phci = nxt_phci) {
9456 
9457                 nxt_phci = phci->cphci_next;
9458                 if (phci->cphci_phci != NULL)
9459                         enqueue_vhcache_phci(vhcache, phci);
9460                 else
9461                         free_vhcache_phci(phci);
9462         }
9463 
9464         vhcache->vhcache_clean_time = ddi_get_lbolt64();
9465         rw_exit(&vhcache->vhcache_lock);
9466         vhcache_dirty(vhc);
9467 }
9468 
9469 /*
9470  * Remove all stale entries from vhci cache.
9471  * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C
9472  */
9473 void
9474 mdi_clean_vhcache(void)
9475 {
9476         mdi_vhci_t *vh;
9477 
9478         mutex_enter(&mdi_mutex);
9479         for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9480                 vh->vh_refcnt++;
9481                 mutex_exit(&mdi_mutex);
9482                 clean_vhcache(vh->vh_config);
9483                 mutex_enter(&mdi_mutex);
9484                 vh->vh_refcnt--;
9485         }
9486         mutex_exit(&mdi_mutex);
9487 }
9488 
9489 /*
9490  * mdi_vhci_walk_clients():
9491  *              Walker routine to traverse client dev_info nodes
9492  * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree
9493  * below the client, including nexus devices, which we dont want.
9494  * So we just traverse the immediate siblings, starting from 1st client.
9495  */
9496 void
9497 mdi_vhci_walk_clients(dev_info_t *vdip,
9498     int (*f)(dev_info_t *, void *), void *arg)
9499 {
9500         mdi_vhci_t      *vh = i_devi_get_vhci(vdip);
9501         dev_info_t      *cdip;
9502         mdi_client_t    *ct;
9503 
9504         MDI_VHCI_CLIENT_LOCK(vh);
9505         cdip = ddi_get_child(vdip);
9506         while (cdip) {
9507                 ct = i_devi_get_client(cdip);
9508                 MDI_CLIENT_LOCK(ct);
9509 
9510                 if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE)
9511                         cdip = ddi_get_next_sibling(cdip);
9512                 else
9513                         cdip = NULL;
9514 
9515                 MDI_CLIENT_UNLOCK(ct);
9516         }
9517         MDI_VHCI_CLIENT_UNLOCK(vh);
9518 }
9519 
9520 /*
9521  * mdi_vhci_walk_phcis():
9522  *              Walker routine to traverse phci dev_info nodes
9523  */
9524 void
9525 mdi_vhci_walk_phcis(dev_info_t *vdip,
9526     int (*f)(dev_info_t *, void *), void *arg)
9527 {
9528         mdi_vhci_t      *vh = i_devi_get_vhci(vdip);
9529         mdi_phci_t      *ph, *next;
9530 
9531         MDI_VHCI_PHCI_LOCK(vh);
9532         ph = vh->vh_phci_head;
9533         while (ph) {
9534                 MDI_PHCI_LOCK(ph);
9535 
9536                 if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE)
9537                         next = ph->ph_next;
9538                 else
9539                         next = NULL;
9540 
9541                 MDI_PHCI_UNLOCK(ph);
9542                 ph = next;
9543         }
9544         MDI_VHCI_PHCI_UNLOCK(vh);
9545 }
9546 
9547 
9548 /*
9549  * mdi_walk_vhcis():
9550  *              Walker routine to traverse vhci dev_info nodes
9551  */
9552 void
9553 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg)
9554 {
9555         mdi_vhci_t      *vh = NULL;
9556 
9557         mutex_enter(&mdi_mutex);
9558         /*
9559          * Scan for already registered vhci
9560          */
9561         for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9562                 vh->vh_refcnt++;
9563                 mutex_exit(&mdi_mutex);
9564                 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) {
9565                         mutex_enter(&mdi_mutex);
9566                         vh->vh_refcnt--;
9567                         break;
9568                 } else {
9569                         mutex_enter(&mdi_mutex);
9570                         vh->vh_refcnt--;
9571                 }
9572         }
9573 
9574         mutex_exit(&mdi_mutex);
9575 }
9576 
9577 /*
9578  * i_mdi_log_sysevent():
9579  *              Logs events for pickup by syseventd
9580  */
9581 static void
9582 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass)
9583 {
9584         char            *path_name;
9585         nvlist_t        *attr_list;
9586 
9587         if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE,
9588             KM_SLEEP) != DDI_SUCCESS) {
9589                 goto alloc_failed;
9590         }
9591 
9592         path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
9593         (void) ddi_pathname(dip, path_name);
9594 
9595         if (nvlist_add_string(attr_list, DDI_DRIVER_NAME,
9596             ddi_driver_name(dip)) != DDI_SUCCESS) {
9597                 goto error;
9598         }
9599 
9600         if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR,
9601             (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) {
9602                 goto error;
9603         }
9604 
9605         if (nvlist_add_int32(attr_list, DDI_INSTANCE,
9606             (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) {
9607                 goto error;
9608         }
9609 
9610         if (nvlist_add_string(attr_list, DDI_PATHNAME,
9611             path_name) != DDI_SUCCESS) {
9612                 goto error;
9613         }
9614 
9615         if (nvlist_add_string(attr_list, DDI_CLASS,
9616             ph_vh_class) != DDI_SUCCESS) {
9617                 goto error;
9618         }
9619 
9620         (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass,
9621             attr_list, NULL, DDI_SLEEP);
9622 
9623 error:
9624         kmem_free(path_name, MAXPATHLEN);
9625         nvlist_free(attr_list);
9626         return;
9627 
9628 alloc_failed:
9629         MDI_DEBUG(1, (MDI_WARN, dip, "!unable to send sysevent"));
9630 }
9631 
9632 char **
9633 mdi_get_phci_driver_list(char *vhci_class, int  *ndrivers)
9634 {
9635         char    **driver_list, **ret_driver_list = NULL;
9636         int     *root_support_list;
9637         int     cur_elements, max_elements;
9638 
9639         get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
9640             &cur_elements, &max_elements);
9641 
9642 
9643         if (driver_list) {
9644                 kmem_free(root_support_list, sizeof (int) * max_elements);
9645                 ret_driver_list = mdi_realloc(driver_list, sizeof (char *)
9646                     * max_elements, sizeof (char *) * cur_elements);
9647         }
9648         *ndrivers = cur_elements;
9649 
9650         return (ret_driver_list);
9651 
9652 }
9653 
9654 void
9655 mdi_free_phci_driver_list(char **driver_list, int ndrivers)
9656 {
9657         char    **p;
9658         int     i;
9659 
9660         if (driver_list) {
9661                 for (i = 0, p = driver_list; i < ndrivers; i++, p++)
9662                         kmem_free(*p, strlen(*p) + 1);
9663                 kmem_free(driver_list, sizeof (char *) * ndrivers);
9664         }
9665 }
9666 
9667 /*
9668  * mdi_is_dev_supported():
9669  *              function called by pHCI bus config operation to determine if a
9670  *              device should be represented as a child of the vHCI or the
9671  *              pHCI.  This decision is made by the vHCI, using cinfo idenity
9672  *              information passed by the pHCI - specifics of the cinfo
9673  *              representation are by agreement between the pHCI and vHCI.
9674  * Return Values:
9675  *              MDI_SUCCESS
9676  *              MDI_FAILURE
9677  */
9678 int
9679 mdi_is_dev_supported(char *class, dev_info_t *pdip, void *cinfo)
9680 {
9681         mdi_vhci_t      *vh;
9682 
9683         ASSERT(class && pdip);
9684 
9685         /*
9686          * For dev_supported, mdi_phci_register() must have established pdip as
9687          * a pHCI.
9688          *
9689          * NOTE: mdi_phci_register() does "mpxio-disable" processing, and
9690          * MDI_PHCI(pdip) will return false if mpxio is disabled.
9691          */
9692         if (!MDI_PHCI(pdip))
9693                 return (MDI_FAILURE);
9694 
9695         /* Return MDI_FAILURE if vHCI does not support asking the question. */
9696         vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
9697         if ((vh == NULL) || (vh->vh_ops->vo_is_dev_supported == NULL)) {
9698                 return (MDI_FAILURE);
9699         }
9700 
9701         /* Return vHCI answer */
9702         return (vh->vh_ops->vo_is_dev_supported(vh->vh_dip, pdip, cinfo));
9703 }
9704 
9705 int
9706 mdi_dc_return_dev_state(mdi_pathinfo_t *pip, struct devctl_iocdata *dcp)
9707 {
9708         uint_t devstate = 0;
9709         dev_info_t *cdip;
9710 
9711         if ((pip == NULL) || (dcp == NULL))
9712                 return (MDI_FAILURE);
9713 
9714         cdip = mdi_pi_get_client(pip);
9715 
9716         switch (mdi_pi_get_state(pip)) {
9717         case MDI_PATHINFO_STATE_INIT:
9718                 devstate = DEVICE_DOWN;
9719                 break;
9720         case MDI_PATHINFO_STATE_ONLINE:
9721                 devstate = DEVICE_ONLINE;
9722                 if ((cdip) && (devi_stillreferenced(cdip) == DEVI_REFERENCED))
9723                         devstate |= DEVICE_BUSY;
9724                 break;
9725         case MDI_PATHINFO_STATE_STANDBY:
9726                 devstate = DEVICE_ONLINE;
9727                 break;
9728         case MDI_PATHINFO_STATE_FAULT:
9729                 devstate = DEVICE_DOWN;
9730                 break;
9731         case MDI_PATHINFO_STATE_OFFLINE:
9732                 devstate = DEVICE_OFFLINE;
9733                 break;
9734         default:
9735                 ASSERT(MDI_PI(pip)->pi_state);
9736         }
9737 
9738         if (copyout(&devstate, dcp->cpyout_buf, sizeof (uint_t)) != 0)
9739                 return (MDI_FAILURE);
9740 
9741         return (MDI_SUCCESS);
9742 }