Print this page
    
NEX-17446 cleanup of hot unplugged disks fails intermittently
Reviewed by: Dan Fields <dan.fields@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
NEX-17944 HBA drivers don't need the redundant devfs_clean step
Reviewed by: Dan Fields <dan.fields@nexenta.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
NEX-17934 NULL pointer reference in kstat_rele()
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
NEX-15288 getting kstat_create('mdi'...) errors on each reboot
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
NEX-15925 pseudonex, rootnex, and friends don't need to log useless device announcements
Reviewed by: Dan Fields <dan.fields@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
OS-253 we should not free mdi_pathinfo_t in mptsas when device(s) are retired
OS-126 Creating a LUN for retired device results in sysevent loop
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/os/sunmdi.c
          +++ new/usr/src/uts/common/os/sunmdi.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  
    | 
      ↓ open down ↓ | 
    10 lines elided | 
    
      ↑ open up ↑ | 
  
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
       21 +
  21   22  /*
  22   23   * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
  23      - * Copyright (c) 2014 Nexenta Systems Inc. All rights reserved.
       24 + * Copyright 2018 Nexenta Systems, Inc.
  24   25   */
  25   26  
  26   27  /*
  27   28   * Multipath driver interface (MDI) implementation; see mdi_impldefs.h for a
  28   29   * more detailed discussion of the overall mpxio architecture.
  29      - *
  30      - * Default locking order:
  31      - *
  32      - * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex);
  33      - * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex);
  34      - * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex);
  35      - * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex);
  36      - * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
  37      - * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
  38      - * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
  39   30   */
  40   31  
  41   32  #include <sys/note.h>
  42   33  #include <sys/types.h>
  43   34  #include <sys/varargs.h>
  44   35  #include <sys/param.h>
  45   36  #include <sys/errno.h>
  46   37  #include <sys/uio.h>
  47   38  #include <sys/buf.h>
  48   39  #include <sys/modctl.h>
  49   40  #include <sys/open.h>
  50   41  #include <sys/kmem.h>
  51   42  #include <sys/poll.h>
  52   43  #include <sys/conf.h>
  53   44  #include <sys/bootconf.h>
  54   45  #include <sys/cmn_err.h>
  55   46  #include <sys/stat.h>
  56   47  #include <sys/ddi.h>
  57   48  #include <sys/sunddi.h>
  58   49  #include <sys/ddipropdefs.h>
  59   50  #include <sys/sunndi.h>
  60   51  #include <sys/ndi_impldefs.h>
  61   52  #include <sys/promif.h>
  62   53  #include <sys/sunmdi.h>
  63   54  #include <sys/mdi_impldefs.h>
  64   55  #include <sys/taskq.h>
  65   56  #include <sys/epm.h>
  66   57  #include <sys/sunpm.h>
  67   58  #include <sys/modhash.h>
  68   59  #include <sys/disp.h>
  69   60  #include <sys/autoconf.h>
  70   61  #include <sys/sysmacros.h>
  71   62  
  72   63  #ifdef  DEBUG
  73   64  #include <sys/debug.h>
  74   65  int     mdi_debug = 1;
  75   66  int     mdi_debug_logonly = 0;
  76   67  #define MDI_DEBUG(dbglevel, pargs) if (mdi_debug >= (dbglevel)) i_mdi_log pargs
  77   68  #define MDI_WARN        CE_WARN, __func__
  78   69  #define MDI_NOTE        CE_NOTE, __func__
  79   70  #define MDI_CONT        CE_CONT, __func__
  80   71  static void i_mdi_log(int, const char *, dev_info_t *, const char *, ...);
  81   72  #else   /* !DEBUG */
  82   73  #define MDI_DEBUG(dbglevel, pargs)
  83   74  #endif  /* DEBUG */
  84   75  int     mdi_debug_consoleonly = 0;
  85   76  int     mdi_delay = 3;
  86   77  
  87   78  extern pri_t    minclsyspri;
  88   79  extern int      modrootloaded;
  89   80  
  90   81  /*
  91   82   * Global mutex:
  92   83   * Protects vHCI list and structure members.
  93   84   */
  94   85  kmutex_t        mdi_mutex;
  95   86  
  96   87  /*
  97   88   * Registered vHCI class driver lists
  98   89   */
  99   90  int             mdi_vhci_count;
 100   91  mdi_vhci_t      *mdi_vhci_head;
 101   92  mdi_vhci_t      *mdi_vhci_tail;
 102   93  
 103   94  /*
 104   95   * Client Hash Table size
 105   96   */
 106   97  static int      mdi_client_table_size = CLIENT_HASH_TABLE_SIZE;
 107   98  
 108   99  /*
 109  100   * taskq interface definitions
 110  101   */
 111  102  #define MDI_TASKQ_N_THREADS     8
 112  103  #define MDI_TASKQ_PRI           minclsyspri
 113  104  #define MDI_TASKQ_MINALLOC      (4*mdi_taskq_n_threads)
 114  105  #define MDI_TASKQ_MAXALLOC      (500*mdi_taskq_n_threads)
 115  106  
 116  107  taskq_t                         *mdi_taskq;
 117  108  static uint_t                   mdi_taskq_n_threads = MDI_TASKQ_N_THREADS;
 118  109  
 119  110  #define TICKS_PER_SECOND        (drv_usectohz(1000000))
 120  111  
 121  112  /*
 122  113   * The data should be "quiet" for this interval (in seconds) before the
 123  114   * vhci cached data is flushed to the disk.
 124  115   */
 125  116  static int mdi_vhcache_flush_delay = 10;
 126  117  
 127  118  /* number of seconds the vhcache flush daemon will sleep idle before exiting */
 128  119  static int mdi_vhcache_flush_daemon_idle_time = 60;
 129  120  
 130  121  /*
 131  122   * MDI falls back to discovery of all paths when a bus_config_one fails.
 132  123   * The following parameters can be used to tune this operation.
 133  124   *
 134  125   * mdi_path_discovery_boot
 135  126   *      Number of times path discovery will be attempted during early boot.
 136  127   *      Probably there is no reason to ever set this value to greater than one.
 137  128   *
 138  129   * mdi_path_discovery_postboot
 139  130   *      Number of times path discovery will be attempted after early boot.
 140  131   *      Set it to a minimum of two to allow for discovery of iscsi paths which
 141  132   *      may happen very late during booting.
 142  133   *
 143  134   * mdi_path_discovery_interval
 144  135   *      Minimum number of seconds MDI will wait between successive discovery
 145  136   *      of all paths. Set it to -1 to disable discovery of all paths.
 146  137   */
 147  138  static int mdi_path_discovery_boot = 1;
 148  139  static int mdi_path_discovery_postboot = 2;
 149  140  static int mdi_path_discovery_interval = 10;
 150  141  
 151  142  /*
 152  143   * number of seconds the asynchronous configuration thread will sleep idle
 153  144   * before exiting.
 154  145   */
 155  146  static int mdi_async_config_idle_time = 600;
 156  147  
 157  148  static int mdi_bus_config_cache_hash_size = 256;
 158  149  
 159  150  /* turns off multithreaded configuration for certain operations */
 160  151  static int mdi_mtc_off = 0;
 161  152  
 162  153  /*
 163  154   * The "path" to a pathinfo node is identical to the /devices path to a
 164  155   * devinfo node had the device been enumerated under a pHCI instead of
 165  156   * a vHCI.  This pathinfo "path" is associated with a 'path_instance'.
 166  157   * This association persists across create/delete of the pathinfo nodes,
 167  158   * but not across reboot.
 168  159   */
 169  160  static uint_t           mdi_pathmap_instance = 1;       /* 0 -> any path */
 170  161  static int              mdi_pathmap_hash_size = 256;
 171  162  static kmutex_t         mdi_pathmap_mutex;
 172  163  static mod_hash_t       *mdi_pathmap_bypath;            /* "path"->instance */
 173  164  static mod_hash_t       *mdi_pathmap_byinstance;        /* instance->"path" */
 174  165  static mod_hash_t       *mdi_pathmap_sbyinstance;       /* inst->shortpath */
 175  166  
 176  167  /*
 177  168   * MDI component property name/value string definitions
 178  169   */
 179  170  const char              *mdi_component_prop = "mpxio-component";
 180  171  const char              *mdi_component_prop_vhci = "vhci";
 181  172  const char              *mdi_component_prop_phci = "phci";
 182  173  const char              *mdi_component_prop_client = "client";
 183  174  
 184  175  /*
 185  176   * MDI client global unique identifier property name
 186  177   */
 187  178  const char              *mdi_client_guid_prop = "client-guid";
 188  179  
 189  180  /*
 190  181   * MDI client load balancing property name/value string definitions
 191  182   */
 192  183  const char              *mdi_load_balance = "load-balance";
 193  184  const char              *mdi_load_balance_none = "none";
 194  185  const char              *mdi_load_balance_rr = "round-robin";
 195  186  const char              *mdi_load_balance_lba = "logical-block";
 196  187  
 197  188  /*
 198  189   * Obsolete vHCI class definition; to be removed after Leadville update
 199  190   */
 200  191  const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI;
 201  192  
 202  193  static char vhci_greeting[] =
 203  194          "\tThere already exists one vHCI driver for class %s\n"
 204  195          "\tOnly one vHCI driver for each class is allowed\n";
 205  196  
 206  197  /*
 207  198   * Static function prototypes
 208  199   */
 209  200  static int              i_mdi_phci_offline(dev_info_t *, uint_t);
 210  201  static int              i_mdi_client_offline(dev_info_t *, uint_t);
 211  202  static int              i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t);
 212  203  static void             i_mdi_phci_post_detach(dev_info_t *,
 213  204                              ddi_detach_cmd_t, int);
 214  205  static int              i_mdi_client_pre_detach(dev_info_t *,
 215  206                              ddi_detach_cmd_t);
 216  207  static void             i_mdi_client_post_detach(dev_info_t *,
 217  208                              ddi_detach_cmd_t, int);
 218  209  static void             i_mdi_pm_hold_pip(mdi_pathinfo_t *);
 219  210  static void             i_mdi_pm_rele_pip(mdi_pathinfo_t *);
 220  211  static int              i_mdi_lba_lb(mdi_client_t *ct,
 221  212                              mdi_pathinfo_t **ret_pip, struct buf *buf);
 222  213  static void             i_mdi_pm_hold_client(mdi_client_t *, int);
 223  214  static void             i_mdi_pm_rele_client(mdi_client_t *, int);
 224  215  static void             i_mdi_pm_reset_client(mdi_client_t *);
 225  216  static int              i_mdi_power_all_phci(mdi_client_t *);
 226  217  static void             i_mdi_log_sysevent(dev_info_t *, char *, char *);
 227  218  
 228  219  
 229  220  /*
 230  221   * Internal mdi_pathinfo node functions
 231  222   */
 232  223  static void             i_mdi_pi_kstat_destroy(mdi_pathinfo_t *);
 233  224  
 234  225  static mdi_vhci_t       *i_mdi_vhci_class2vhci(char *);
 235  226  static mdi_vhci_t       *i_devi_get_vhci(dev_info_t *);
 236  227  static mdi_phci_t       *i_devi_get_phci(dev_info_t *);
 237  228  static void             i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *);
 238  229  static void             i_mdi_phci_unlock(mdi_phci_t *);
 239  230  static mdi_pathinfo_t   *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *);
 240  231  static void             i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *);
 241  232  static void             i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *);
 242  233  static void             i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *,
 243  234                              mdi_client_t *);
 244  235  static void             i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *);
 245  236  static void             i_mdi_client_remove_path(mdi_client_t *,
 246  237                              mdi_pathinfo_t *);
 247  238  
 248  239  static int              i_mdi_pi_state_change(mdi_pathinfo_t *,
 249  240                              mdi_pathinfo_state_t, int);
 250  241  static int              i_mdi_pi_offline(mdi_pathinfo_t *, int);
 251  242  static dev_info_t       *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *,
 252  243                              char **, int);
 253  244  static dev_info_t       *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *);
 254  245  static int              i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int);
 255  246  static int              i_mdi_is_child_present(dev_info_t *, dev_info_t *);
 256  247  static mdi_client_t     *i_mdi_client_alloc(mdi_vhci_t *, char *, char *);
 257  248  static void             i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *);
 258  249  static void             i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *);
 259  250  static mdi_client_t     *i_mdi_client_find(mdi_vhci_t *, char *, char *);
 260  251  static void             i_mdi_client_update_state(mdi_client_t *);
 261  252  static int              i_mdi_client_compute_state(mdi_client_t *,
 262  253                              mdi_phci_t *);
 263  254  static void             i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *);
 264  255  static void             i_mdi_client_unlock(mdi_client_t *);
 265  256  static int              i_mdi_client_free(mdi_vhci_t *, mdi_client_t *);
 266  257  static mdi_client_t     *i_devi_get_client(dev_info_t *);
 267  258  /*
 268  259   * NOTE: this will be removed once the NWS files are changed to use the new
 269  260   * mdi_{enable,disable}_path interfaces
 270  261   */
 271  262  static int              i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *,
 272  263                                  int, int);
 273  264  static mdi_pathinfo_t   *i_mdi_enable_disable_path(mdi_pathinfo_t *pip,
 274  265                                  mdi_vhci_t *vh, int flags, int op);
 275  266  /*
 276  267   * Failover related function prototypes
 277  268   */
 278  269  static int              i_mdi_failover(void *);
 279  270  
 280  271  /*
 281  272   * misc internal functions
 282  273   */
 283  274  static int              i_mdi_get_hash_key(char *);
 284  275  static int              i_map_nvlist_error_to_mdi(int);
 285  276  static void             i_mdi_report_path_state(mdi_client_t *,
 286  277                              mdi_pathinfo_t *);
 287  278  
 288  279  static void             setup_vhci_cache(mdi_vhci_t *);
 289  280  static int              destroy_vhci_cache(mdi_vhci_t *);
 290  281  static int              stop_vhcache_async_threads(mdi_vhci_config_t *);
 291  282  static boolean_t        stop_vhcache_flush_thread(void *, int);
 292  283  static void             free_string_array(char **, int);
 293  284  static void             free_vhcache_phci(mdi_vhcache_phci_t *);
 294  285  static void             free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *);
 295  286  static void             free_vhcache_client(mdi_vhcache_client_t *);
 296  287  static int              mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *);
 297  288  static nvlist_t         *vhcache_to_mainnvl(mdi_vhci_cache_t *);
 298  289  static void             vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *);
 299  290  static void             vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *);
 300  291  static void             vhcache_pi_add(mdi_vhci_config_t *,
 301  292                              struct mdi_pathinfo *);
 302  293  static void             vhcache_pi_remove(mdi_vhci_config_t *,
 303  294                              struct mdi_pathinfo *);
 304  295  static void             free_phclient_path_list(mdi_phys_path_t *);
 305  296  static void             sort_vhcache_paths(mdi_vhcache_client_t *);
 306  297  static int              flush_vhcache(mdi_vhci_config_t *, int);
 307  298  static void             vhcache_dirty(mdi_vhci_config_t *);
 308  299  static void             free_async_client_config(mdi_async_client_config_t *);
 309  300  static void             single_threaded_vhconfig_enter(mdi_vhci_config_t *);
 310  301  static void             single_threaded_vhconfig_exit(mdi_vhci_config_t *);
 311  302  static nvlist_t         *read_on_disk_vhci_cache(char *);
 312  303  extern int              fread_nvlist(char *, nvlist_t **);
 313  304  extern int              fwrite_nvlist(char *, nvlist_t *);
 314  305  
 315  306  /* called once when first vhci registers with mdi */
 316  307  static void
 317  308  i_mdi_init()
 318  309  {
 319  310          static int initialized = 0;
 320  311  
 321  312          if (initialized)
 322  313                  return;
 323  314          initialized = 1;
 324  315  
 325  316          mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL);
 326  317  
 327  318          /* Create our taskq resources */
 328  319          mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads,
 329  320              MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC,
 330  321              TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
 331  322          ASSERT(mdi_taskq != NULL);      /* taskq_create never fails */
 332  323  
 333  324          /* Allocate ['path_instance' <-> "path"] maps */
 334  325          mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL);
 335  326          mdi_pathmap_bypath = mod_hash_create_strhash(
 336  327              "mdi_pathmap_bypath", mdi_pathmap_hash_size,
 337  328              mod_hash_null_valdtor);
 338  329          mdi_pathmap_byinstance = mod_hash_create_idhash(
 339  330              "mdi_pathmap_byinstance", mdi_pathmap_hash_size,
 340  331              mod_hash_null_valdtor);
 341  332          mdi_pathmap_sbyinstance = mod_hash_create_idhash(
 342  333              "mdi_pathmap_sbyinstance", mdi_pathmap_hash_size,
 343  334              mod_hash_null_valdtor);
 344  335  }
 345  336  
 346  337  /*
 347  338   * mdi_get_component_type():
 348  339   *              Return mpxio component type
 349  340   * Return Values:
 350  341   *              MDI_COMPONENT_NONE
 351  342   *              MDI_COMPONENT_VHCI
 352  343   *              MDI_COMPONENT_PHCI
 353  344   *              MDI_COMPONENT_CLIENT
 354  345   * XXX This doesn't work under multi-level MPxIO and should be
 355  346   *      removed when clients migrate mdi_component_is_*() interfaces.
 356  347   */
 357  348  int
 358  349  mdi_get_component_type(dev_info_t *dip)
 359  350  {
 360  351          return (DEVI(dip)->devi_mdi_component);
 361  352  }
 362  353  
 363  354  /*
 364  355   * mdi_vhci_register():
 365  356   *              Register a vHCI module with the mpxio framework
 366  357   *              mdi_vhci_register() is called by vHCI drivers to register the
 367  358   *              'class_driver' vHCI driver and its MDI entrypoints with the
 368  359   *              mpxio framework.  The vHCI driver must call this interface as
 369  360   *              part of its attach(9e) handler.
 370  361   *              Competing threads may try to attach mdi_vhci_register() as
 371  362   *              the vHCI drivers are loaded and attached as a result of pHCI
 372  363   *              driver instance registration (mdi_phci_register()) with the
 373  364   *              framework.
 374  365   * Return Values:
 375  366   *              MDI_SUCCESS
 376  367   *              MDI_FAILURE
 377  368   */
 378  369  /*ARGSUSED*/
 379  370  int
 380  371  mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops,
 381  372      int flags)
 382  373  {
 383  374          mdi_vhci_t              *vh = NULL;
 384  375  
 385  376          /* Registrant can't be older */
 386  377          ASSERT(vops->vo_revision <= MDI_VHCI_OPS_REV);
 387  378  
 388  379  #ifdef DEBUG
 389  380          /*
 390  381           * IB nexus driver is loaded only when IB hardware is present.
 391  382           * In order to be able to do this there is a need to drive the loading
 392  383           * and attaching of the IB nexus driver (especially when an IB hardware
 393  384           * is dynamically plugged in) when an IB HCA driver (PHCI)
 394  385           * is being attached. Unfortunately this gets into the limitations
 395  386           * of devfs as there seems to be no clean way to drive configuration
 396  387           * of a subtree from another subtree of a devfs. Hence, do not ASSERT
 397  388           * for IB.
 398  389           */
 399  390          if (strcmp(class, MDI_HCI_CLASS_IB) != 0)
 400  391                  ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
 401  392  #endif
 402  393  
 403  394          i_mdi_init();
 404  395  
 405  396          mutex_enter(&mdi_mutex);
 406  397          /*
 407  398           * Scan for already registered vhci
 408  399           */
 409  400          for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
 410  401                  if (strcmp(vh->vh_class, class) == 0) {
 411  402                          /*
 412  403                           * vHCI has already been created.  Check for valid
 413  404                           * vHCI ops registration.  We only support one vHCI
 414  405                           * module per class
 415  406                           */
 416  407                          if (vh->vh_ops != NULL) {
 417  408                                  mutex_exit(&mdi_mutex);
 418  409                                  cmn_err(CE_NOTE, vhci_greeting, class);
 419  410                                  return (MDI_FAILURE);
 420  411                          }
 421  412                          break;
 422  413                  }
 423  414          }
 424  415  
 425  416          /*
 426  417           * if not yet created, create the vHCI component
 427  418           */
 428  419          if (vh == NULL) {
 429  420                  struct client_hash      *hash = NULL;
 430  421                  char                    *load_balance;
 431  422  
 432  423                  /*
 433  424                   * Allocate and initialize the mdi extensions
 434  425                   */
 435  426                  vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP);
 436  427                  hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash),
 437  428                      KM_SLEEP);
 438  429                  vh->vh_client_table = hash;
 439  430                  vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP);
 440  431                  (void) strcpy(vh->vh_class, class);
 441  432                  vh->vh_lb = LOAD_BALANCE_RR;
 442  433                  if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip,
 443  434                      0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) {
 444  435                          if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) {
 445  436                                  vh->vh_lb = LOAD_BALANCE_NONE;
 446  437                          } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA)
 447  438                                      == 0) {
 448  439                                  vh->vh_lb = LOAD_BALANCE_LBA;
 449  440                          }
 450  441                          ddi_prop_free(load_balance);
 451  442                  }
 452  443  
 453  444                  mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL);
 454  445                  mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL);
 455  446  
 456  447                  /*
 457  448                   * Store the vHCI ops vectors
 458  449                   */
 459  450                  vh->vh_dip = vdip;
 460  451                  vh->vh_ops = vops;
 461  452  
 462  453                  setup_vhci_cache(vh);
 463  454  
 464  455                  if (mdi_vhci_head == NULL) {
 465  456                          mdi_vhci_head = vh;
 466  457                  }
 467  458                  if (mdi_vhci_tail) {
 468  459                          mdi_vhci_tail->vh_next = vh;
 469  460                  }
 470  461                  mdi_vhci_tail = vh;
 471  462                  mdi_vhci_count++;
 472  463          }
 473  464  
 474  465          /*
 475  466           * Claim the devfs node as a vhci component
 476  467           */
 477  468          DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI;
 478  469  
 479  470          /*
 480  471           * Initialize our back reference from dev_info node
 481  472           */
 482  473          DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh;
 483  474          mutex_exit(&mdi_mutex);
 484  475          return (MDI_SUCCESS);
 485  476  }
 486  477  
 487  478  /*
 488  479   * mdi_vhci_unregister():
 489  480   *              Unregister a vHCI module from mpxio framework
 490  481   *              mdi_vhci_unregister() is called from the detach(9E) entrypoint
 491  482   *              of a vhci to unregister it from the framework.
 492  483   * Return Values:
 493  484   *              MDI_SUCCESS
 494  485   *              MDI_FAILURE
 495  486   */
 496  487  /*ARGSUSED*/
 497  488  int
 498  489  mdi_vhci_unregister(dev_info_t *vdip, int flags)
 499  490  {
 500  491          mdi_vhci_t      *found, *vh, *prev = NULL;
 501  492  
 502  493          ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
 503  494  
 504  495          /*
 505  496           * Check for invalid VHCI
 506  497           */
 507  498          if ((vh = i_devi_get_vhci(vdip)) == NULL)
 508  499                  return (MDI_FAILURE);
 509  500  
 510  501          /*
 511  502           * Scan the list of registered vHCIs for a match
 512  503           */
 513  504          mutex_enter(&mdi_mutex);
 514  505          for (found = mdi_vhci_head; found != NULL; found = found->vh_next) {
 515  506                  if (found == vh)
 516  507                          break;
 517  508                  prev = found;
 518  509          }
 519  510  
 520  511          if (found == NULL) {
 521  512                  mutex_exit(&mdi_mutex);
 522  513                  return (MDI_FAILURE);
 523  514          }
 524  515  
 525  516          /*
 526  517           * Check the vHCI, pHCI and client count. All the pHCIs and clients
 527  518           * should have been unregistered, before a vHCI can be
 528  519           * unregistered.
 529  520           */
 530  521          MDI_VHCI_PHCI_LOCK(vh);
 531  522          if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) {
 532  523                  MDI_VHCI_PHCI_UNLOCK(vh);
 533  524                  mutex_exit(&mdi_mutex);
 534  525                  return (MDI_FAILURE);
 535  526          }
 536  527          MDI_VHCI_PHCI_UNLOCK(vh);
 537  528  
 538  529          if (destroy_vhci_cache(vh) != MDI_SUCCESS) {
 539  530                  mutex_exit(&mdi_mutex);
 540  531                  return (MDI_FAILURE);
 541  532          }
 542  533  
 543  534          /*
 544  535           * Remove the vHCI from the global list
 545  536           */
 546  537          if (vh == mdi_vhci_head) {
 547  538                  mdi_vhci_head = vh->vh_next;
 548  539          } else {
 549  540                  prev->vh_next = vh->vh_next;
 550  541          }
 551  542          if (vh == mdi_vhci_tail) {
 552  543                  mdi_vhci_tail = prev;
 553  544          }
 554  545          mdi_vhci_count--;
 555  546          mutex_exit(&mdi_mutex);
 556  547  
 557  548          vh->vh_ops = NULL;
 558  549          DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI;
 559  550          DEVI(vdip)->devi_mdi_xhci = NULL;
 560  551          kmem_free(vh->vh_class, strlen(vh->vh_class)+1);
 561  552          kmem_free(vh->vh_client_table,
 562  553              mdi_client_table_size * sizeof (struct client_hash));
 563  554          mutex_destroy(&vh->vh_phci_mutex);
 564  555          mutex_destroy(&vh->vh_client_mutex);
 565  556  
 566  557          kmem_free(vh, sizeof (mdi_vhci_t));
 567  558          return (MDI_SUCCESS);
 568  559  }
 569  560  
 570  561  /*
 571  562   * i_mdi_vhci_class2vhci():
 572  563   *              Look for a matching vHCI module given a vHCI class name
 573  564   * Return Values:
 574  565   *              Handle to a vHCI component
 575  566   *              NULL
 576  567   */
 577  568  static mdi_vhci_t *
 578  569  i_mdi_vhci_class2vhci(char *class)
 579  570  {
 580  571          mdi_vhci_t      *vh = NULL;
 581  572  
 582  573          ASSERT(!MUTEX_HELD(&mdi_mutex));
 583  574  
 584  575          mutex_enter(&mdi_mutex);
 585  576          for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
 586  577                  if (strcmp(vh->vh_class, class) == 0) {
 587  578                          break;
 588  579                  }
 589  580          }
 590  581          mutex_exit(&mdi_mutex);
 591  582          return (vh);
 592  583  }
 593  584  
 594  585  /*
 595  586   * i_devi_get_vhci():
 596  587   *              Utility function to get the handle to a vHCI component
 597  588   * Return Values:
 598  589   *              Handle to a vHCI component
 599  590   *              NULL
 600  591   */
 601  592  mdi_vhci_t *
 602  593  i_devi_get_vhci(dev_info_t *vdip)
 603  594  {
 604  595          mdi_vhci_t      *vh = NULL;
 605  596          if (MDI_VHCI(vdip)) {
 606  597                  vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci;
 607  598          }
 608  599          return (vh);
 609  600  }
 610  601  
 611  602  /*
 612  603   * mdi_phci_register():
 613  604   *              Register a pHCI module with mpxio framework
 614  605   *              mdi_phci_register() is called by pHCI drivers to register with
 615  606   *              the mpxio framework and a specific 'class_driver' vHCI.  The
 616  607   *              pHCI driver must call this interface as part of its attach(9e)
 617  608   *              handler.
 618  609   * Return Values:
 619  610   *              MDI_SUCCESS
 620  611   *              MDI_FAILURE
 621  612   */
 622  613  /*ARGSUSED*/
 623  614  int
 624  615  mdi_phci_register(char *class, dev_info_t *pdip, int flags)
 625  616  {
 626  617          mdi_phci_t              *ph;
 627  618          mdi_vhci_t              *vh;
 628  619          char                    *data;
 629  620  
 630  621          /*
 631  622           * Some subsystems, like fcp, perform pHCI registration from a
 632  623           * different thread than the one doing the pHCI attach(9E) - the
 633  624           * driver attach code is waiting for this other thread to complete.
 634  625           * This means we can only ASSERT DEVI_BUSY_CHANGING of parent
 635  626           * (indicating that some thread has done an ndi_devi_enter of parent)
 636  627           * not DEVI_BUSY_OWNED (which would indicate that we did the enter).
 637  628           */
 638  629          ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
 639  630  
 640  631          /*
 641  632           * Check for mpxio-disable property. Enable mpxio if the property is
 642  633           * missing or not set to "yes".
 643  634           * If the property is set to "yes" then emit a brief message.
 644  635           */
 645  636          if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable",
 646  637              &data) == DDI_SUCCESS)) {
 647  638                  if (strcmp(data, "yes") == 0) {
 648  639                          MDI_DEBUG(1, (MDI_CONT, pdip,
 649  640                              "?multipath capabilities disabled via %s.conf.",
 650  641                              ddi_driver_name(pdip)));
 651  642                          ddi_prop_free(data);
 652  643                          return (MDI_FAILURE);
 653  644                  }
 654  645                  ddi_prop_free(data);
 655  646          }
 656  647  
 657  648          /*
 658  649           * Search for a matching vHCI
 659  650           */
 660  651          vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
 661  652          if (vh == NULL) {
 662  653                  return (MDI_FAILURE);
 663  654          }
 664  655  
 665  656          ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP);
 666  657          mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL);
 667  658          ph->ph_dip = pdip;
 668  659          ph->ph_vhci = vh;
 669  660          ph->ph_next = NULL;
 670  661          ph->ph_unstable = 0;
 671  662          ph->ph_vprivate = 0;
 672  663          cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL);
 673  664  
 674  665          MDI_PHCI_LOCK(ph);
 675  666          MDI_PHCI_SET_POWER_UP(ph);
 676  667          MDI_PHCI_UNLOCK(ph);
 677  668          DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI;
 678  669          DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph;
 679  670  
 680  671          vhcache_phci_add(vh->vh_config, ph);
 681  672  
 682  673          MDI_VHCI_PHCI_LOCK(vh);
 683  674          if (vh->vh_phci_head == NULL) {
 684  675                  vh->vh_phci_head = ph;
 685  676          }
 686  677          if (vh->vh_phci_tail) {
 687  678                  vh->vh_phci_tail->ph_next = ph;
 688  679          }
 689  680          vh->vh_phci_tail = ph;
 690  681          vh->vh_phci_count++;
 691  682          MDI_VHCI_PHCI_UNLOCK(vh);
 692  683  
 693  684          i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER);
 694  685          return (MDI_SUCCESS);
 695  686  }
 696  687  
 697  688  /*
 698  689   * mdi_phci_unregister():
 699  690   *              Unregister a pHCI module from mpxio framework
 700  691   *              mdi_phci_unregister() is called by the pHCI drivers from their
 701  692   *              detach(9E) handler to unregister their instances from the
 702  693   *              framework.
 703  694   * Return Values:
 704  695   *              MDI_SUCCESS
 705  696   *              MDI_FAILURE
 706  697   */
 707  698  /*ARGSUSED*/
 708  699  int
 709  700  mdi_phci_unregister(dev_info_t *pdip, int flags)
 710  701  {
 711  702          mdi_vhci_t              *vh;
 712  703          mdi_phci_t              *ph;
 713  704          mdi_phci_t              *tmp;
 714  705          mdi_phci_t              *prev = NULL;
 715  706          mdi_pathinfo_t          *pip;
 716  707  
 717  708          ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
 718  709  
 719  710          ph = i_devi_get_phci(pdip);
 720  711          if (ph == NULL) {
 721  712                  MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid pHCI"));
 722  713                  return (MDI_FAILURE);
 723  714          }
 724  715  
 725  716          vh = ph->ph_vhci;
 726  717          ASSERT(vh != NULL);
 727  718          if (vh == NULL) {
 728  719                  MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid vHCI"));
 729  720                  return (MDI_FAILURE);
 730  721          }
 731  722  
 732  723          MDI_VHCI_PHCI_LOCK(vh);
 733  724          tmp = vh->vh_phci_head;
 734  725          while (tmp) {
 735  726                  if (tmp == ph) {
 736  727                          break;
 737  728                  }
 738  729                  prev = tmp;
 739  730                  tmp = tmp->ph_next;
 740  731          }
 741  732  
 742  733          if (ph == vh->vh_phci_head) {
 743  734                  vh->vh_phci_head = ph->ph_next;
 744  735          } else {
 745  736                  prev->ph_next = ph->ph_next;
 746  737          }
 747  738  
 748  739          if (ph == vh->vh_phci_tail) {
 749  740                  vh->vh_phci_tail = prev;
 750  741          }
 751  742  
 752  743          vh->vh_phci_count--;
 753  744          MDI_VHCI_PHCI_UNLOCK(vh);
 754  745  
 755  746          /* Walk remaining pathinfo nodes and disassociate them from pHCI */
 756  747          MDI_PHCI_LOCK(ph);
 757  748          for (pip = (mdi_pathinfo_t *)ph->ph_path_head; pip;
 758  749              pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link)
 759  750                  MDI_PI(pip)->pi_phci = NULL;
 760  751          MDI_PHCI_UNLOCK(ph);
 761  752  
 762  753          i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class,
 763  754              ESC_DDI_INITIATOR_UNREGISTER);
 764  755          vhcache_phci_remove(vh->vh_config, ph);
 765  756          cv_destroy(&ph->ph_unstable_cv);
 766  757          mutex_destroy(&ph->ph_mutex);
 767  758          kmem_free(ph, sizeof (mdi_phci_t));
 768  759          DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI;
 769  760          DEVI(pdip)->devi_mdi_xhci = NULL;
 770  761          return (MDI_SUCCESS);
 771  762  }
 772  763  
 773  764  /*
 774  765   * i_devi_get_phci():
 775  766   *              Utility function to return the phci extensions.
 776  767   */
 777  768  static mdi_phci_t *
 778  769  i_devi_get_phci(dev_info_t *pdip)
 779  770  {
 780  771          mdi_phci_t      *ph = NULL;
 781  772  
 782  773          if (MDI_PHCI(pdip)) {
 783  774                  ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci;
 784  775          }
 785  776          return (ph);
 786  777  }
 787  778  
 788  779  /*
 789  780   * Single thread mdi entry into devinfo node for modifying its children.
 790  781   * If necessary we perform an ndi_devi_enter of the vHCI before doing
 791  782   * an ndi_devi_enter of 'dip'.  We maintain circular in two parts: one
 792  783   * for the vHCI and one for the pHCI.
 793  784   */
 794  785  void
 795  786  mdi_devi_enter(dev_info_t *phci_dip, int *circular)
 796  787  {
 797  788          dev_info_t      *vdip;
 798  789          int             vcircular, pcircular;
 799  790  
 800  791          /* Verify calling context */
 801  792          ASSERT(MDI_PHCI(phci_dip));
 802  793          vdip = mdi_devi_get_vdip(phci_dip);
 803  794          ASSERT(vdip);                   /* A pHCI always has a vHCI */
 804  795  
 805  796          /*
 806  797           * If pHCI is detaching then the framework has already entered the
 807  798           * vHCI on a threads that went down the code path leading to
 808  799           * detach_node().  This framework enter of the vHCI during pHCI
 809  800           * detach is done to avoid deadlock with vHCI power management
 810  801           * operations which enter the vHCI and the enter down the path
 811  802           * to the pHCI. If pHCI is detaching then we piggyback this calls
 812  803           * enter of the vHCI on frameworks vHCI enter that has already
 813  804           * occurred - this is OK because we know that the framework thread
 814  805           * doing detach is waiting for our completion.
 815  806           *
 816  807           * We should DEVI_IS_DETACHING under an enter of the parent to avoid
 817  808           * race with detach - but we can't do that because the framework has
 818  809           * already entered the parent, so we have some complexity instead.
 819  810           */
 820  811          for (;;) {
 821  812                  if (ndi_devi_tryenter(vdip, &vcircular)) {
 822  813                          ASSERT(vcircular != -1);
 823  814                          if (DEVI_IS_DETACHING(phci_dip)) {
 824  815                                  ndi_devi_exit(vdip, vcircular);
 825  816                                  vcircular = -1;
 826  817                          }
 827  818                          break;
 828  819                  } else if (DEVI_IS_DETACHING(phci_dip)) {
 829  820                          vcircular = -1;
 830  821                          break;
 831  822                  } else if (servicing_interrupt()) {
 832  823                          /*
 833  824                           * Don't delay an interrupt (and ensure adaptive
 834  825                           * mutex inversion support).
 835  826                           */
 836  827                          ndi_devi_enter(vdip, &vcircular);
 837  828                          break;
 838  829                  } else {
 839  830                          delay_random(mdi_delay);
 840  831                  }
 841  832          }
 842  833  
 843  834          ndi_devi_enter(phci_dip, &pcircular);
 844  835          *circular = (vcircular << 16) | (pcircular & 0xFFFF);
 845  836  }
 846  837  
 847  838  /*
 848  839   * Attempt to mdi_devi_enter.
 849  840   */
 850  841  int
 851  842  mdi_devi_tryenter(dev_info_t *phci_dip, int *circular)
 852  843  {
 853  844          dev_info_t      *vdip;
 854  845          int             vcircular, pcircular;
 855  846  
 856  847          /* Verify calling context */
 857  848          ASSERT(MDI_PHCI(phci_dip));
 858  849          vdip = mdi_devi_get_vdip(phci_dip);
 859  850          ASSERT(vdip);                   /* A pHCI always has a vHCI */
 860  851  
 861  852          if (ndi_devi_tryenter(vdip, &vcircular)) {
 862  853                  if (ndi_devi_tryenter(phci_dip, &pcircular)) {
 863  854                          *circular = (vcircular << 16) | (pcircular & 0xFFFF);
 864  855                          return (1);     /* locked */
 865  856                  }
 866  857                  ndi_devi_exit(vdip, vcircular);
 867  858          }
 868  859          return (0);                     /* busy */
 869  860  }
 870  861  
 871  862  /*
 872  863   * Release mdi_devi_enter or successful mdi_devi_tryenter.
 873  864   */
 874  865  void
 875  866  mdi_devi_exit(dev_info_t *phci_dip, int circular)
 876  867  {
 877  868          dev_info_t      *vdip;
 878  869          int             vcircular, pcircular;
 879  870  
 880  871          /* Verify calling context */
 881  872          ASSERT(MDI_PHCI(phci_dip));
 882  873          vdip = mdi_devi_get_vdip(phci_dip);
 883  874          ASSERT(vdip);                   /* A pHCI always has a vHCI */
 884  875  
 885  876          /* extract two circular recursion values from single int */
 886  877          pcircular = (short)(circular & 0xFFFF);
 887  878          vcircular = (short)((circular >> 16) & 0xFFFF);
 888  879  
 889  880          ndi_devi_exit(phci_dip, pcircular);
 890  881          if (vcircular != -1)
 891  882                  ndi_devi_exit(vdip, vcircular);
 892  883  }
 893  884  
 894  885  /*
 895  886   * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used
 896  887   * around a pHCI drivers calls to mdi_pi_online/offline, after holding
 897  888   * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock
 898  889   * with vHCI power management code during path online/offline.  Each
 899  890   * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must
 900  891   * occur within the scope of an active mdi_devi_enter that establishes the
 901  892   * circular value.
 902  893   */
 903  894  void
 904  895  mdi_devi_exit_phci(dev_info_t *phci_dip, int circular)
 905  896  {
 906  897          int             pcircular;
 907  898  
 908  899          /* Verify calling context */
 909  900          ASSERT(MDI_PHCI(phci_dip));
 910  901  
 911  902          /* Keep hold on pHCI until we reenter in mdi_devi_enter_phci */
 912  903          ndi_hold_devi(phci_dip);
 913  904  
 914  905          pcircular = (short)(circular & 0xFFFF);
 915  906          ndi_devi_exit(phci_dip, pcircular);
 916  907  }
 917  908  
 918  909  void
 919  910  mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular)
 920  911  {
 921  912          int             pcircular;
 922  913  
 923  914          /* Verify calling context */
 924  915          ASSERT(MDI_PHCI(phci_dip));
 925  916  
 926  917          ndi_devi_enter(phci_dip, &pcircular);
 927  918  
 928  919          /* Drop hold from mdi_devi_exit_phci. */
 929  920          ndi_rele_devi(phci_dip);
 930  921  
 931  922          /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */
 932  923          ASSERT(pcircular == ((short)(*circular & 0xFFFF)));
 933  924  }
 934  925  
 935  926  /*
 936  927   * mdi_devi_get_vdip():
 937  928   *              given a pHCI dip return vHCI dip
 938  929   */
 939  930  dev_info_t *
 940  931  mdi_devi_get_vdip(dev_info_t *pdip)
 941  932  {
 942  933          mdi_phci_t      *ph;
 943  934  
 944  935          ph = i_devi_get_phci(pdip);
 945  936          if (ph && ph->ph_vhci)
 946  937                  return (ph->ph_vhci->vh_dip);
 947  938          return (NULL);
 948  939  }
 949  940  
 950  941  /*
 951  942   * mdi_devi_pdip_entered():
 952  943   *              Return 1 if we are vHCI and have done an ndi_devi_enter
 953  944   *              of a pHCI
 954  945   */
 955  946  int
 956  947  mdi_devi_pdip_entered(dev_info_t *vdip)
 957  948  {
 958  949          mdi_vhci_t      *vh;
 959  950          mdi_phci_t      *ph;
 960  951  
 961  952          vh = i_devi_get_vhci(vdip);
 962  953          if (vh == NULL)
 963  954                  return (0);
 964  955  
 965  956          MDI_VHCI_PHCI_LOCK(vh);
 966  957          ph = vh->vh_phci_head;
 967  958          while (ph) {
 968  959                  if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) {
 969  960                          MDI_VHCI_PHCI_UNLOCK(vh);
 970  961                          return (1);
 971  962                  }
 972  963                  ph = ph->ph_next;
 973  964          }
 974  965          MDI_VHCI_PHCI_UNLOCK(vh);
 975  966          return (0);
 976  967  }
 977  968  
 978  969  /*
 979  970   * mdi_phci_path2devinfo():
 980  971   *              Utility function to search for a valid phci device given
 981  972   *              the devfs pathname.
 982  973   */
 983  974  dev_info_t *
 984  975  mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname)
 985  976  {
 986  977          char            *temp_pathname;
 987  978          mdi_vhci_t      *vh;
 988  979          mdi_phci_t      *ph;
 989  980          dev_info_t      *pdip = NULL;
 990  981  
 991  982          vh = i_devi_get_vhci(vdip);
 992  983          ASSERT(vh != NULL);
 993  984  
 994  985          if (vh == NULL) {
 995  986                  /*
 996  987                   * Invalid vHCI component, return failure
 997  988                   */
 998  989                  return (NULL);
 999  990          }
1000  991  
1001  992          temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1002  993          MDI_VHCI_PHCI_LOCK(vh);
1003  994          ph = vh->vh_phci_head;
1004  995          while (ph != NULL) {
1005  996                  pdip = ph->ph_dip;
1006  997                  ASSERT(pdip != NULL);
1007  998                  *temp_pathname = '\0';
1008  999                  (void) ddi_pathname(pdip, temp_pathname);
1009 1000                  if (strcmp(temp_pathname, pathname) == 0) {
1010 1001                          break;
1011 1002                  }
1012 1003                  ph = ph->ph_next;
1013 1004          }
1014 1005          if (ph == NULL) {
1015 1006                  pdip = NULL;
1016 1007          }
1017 1008          MDI_VHCI_PHCI_UNLOCK(vh);
1018 1009          kmem_free(temp_pathname, MAXPATHLEN);
1019 1010          return (pdip);
1020 1011  }
1021 1012  
1022 1013  /*
1023 1014   * mdi_phci_get_path_count():
1024 1015   *              get number of path information nodes associated with a given
1025 1016   *              pHCI device.
1026 1017   */
1027 1018  int
1028 1019  mdi_phci_get_path_count(dev_info_t *pdip)
1029 1020  {
1030 1021          mdi_phci_t      *ph;
1031 1022          int             count = 0;
1032 1023  
1033 1024          ph = i_devi_get_phci(pdip);
1034 1025          if (ph != NULL) {
1035 1026                  count = ph->ph_path_count;
1036 1027          }
1037 1028          return (count);
1038 1029  }
1039 1030  
1040 1031  /*
1041 1032   * i_mdi_phci_lock():
1042 1033   *              Lock a pHCI device
1043 1034   * Return Values:
1044 1035   *              None
1045 1036   * Note:
1046 1037   *              The default locking order is:
1047 1038   *              _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
1048 1039   *              But there are number of situations where locks need to be
1049 1040   *              grabbed in reverse order.  This routine implements try and lock
1050 1041   *              mechanism depending on the requested parameter option.
1051 1042   */
1052 1043  static void
1053 1044  i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip)
1054 1045  {
1055 1046          if (pip) {
1056 1047                  /* Reverse locking is requested. */
1057 1048                  while (MDI_PHCI_TRYLOCK(ph) == 0) {
1058 1049                          if (servicing_interrupt()) {
1059 1050                                  MDI_PI_HOLD(pip);
1060 1051                                  MDI_PI_UNLOCK(pip);
1061 1052                                  MDI_PHCI_LOCK(ph);
1062 1053                                  MDI_PI_LOCK(pip);
1063 1054                                  MDI_PI_RELE(pip);
1064 1055                                  break;
1065 1056                          } else {
1066 1057                                  /*
1067 1058                                   * tryenter failed. Try to grab again
1068 1059                                   * after a small delay
1069 1060                                   */
1070 1061                                  MDI_PI_HOLD(pip);
1071 1062                                  MDI_PI_UNLOCK(pip);
1072 1063                                  delay_random(mdi_delay);
1073 1064                                  MDI_PI_LOCK(pip);
1074 1065                                  MDI_PI_RELE(pip);
1075 1066                          }
1076 1067                  }
1077 1068          } else {
1078 1069                  MDI_PHCI_LOCK(ph);
1079 1070          }
1080 1071  }
1081 1072  
1082 1073  /*
1083 1074   * i_mdi_phci_unlock():
1084 1075   *              Unlock the pHCI component
1085 1076   */
1086 1077  static void
1087 1078  i_mdi_phci_unlock(mdi_phci_t *ph)
1088 1079  {
1089 1080          MDI_PHCI_UNLOCK(ph);
1090 1081  }
1091 1082  
1092 1083  /*
1093 1084   * i_mdi_devinfo_create():
1094 1085   *              create client device's devinfo node
1095 1086   * Return Values:
1096 1087   *              dev_info
1097 1088   *              NULL
1098 1089   * Notes:
1099 1090   */
1100 1091  static dev_info_t *
1101 1092  i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid,
1102 1093          char **compatible, int ncompatible)
1103 1094  {
1104 1095          dev_info_t *cdip = NULL;
1105 1096  
1106 1097          ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1107 1098  
1108 1099          /* Verify for duplicate entry */
1109 1100          cdip = i_mdi_devinfo_find(vh, name, guid);
1110 1101          ASSERT(cdip == NULL);
1111 1102          if (cdip) {
1112 1103                  cmn_err(CE_WARN,
1113 1104                      "i_mdi_devinfo_create: client %s@%s already exists",
1114 1105                          name ? name : "", guid ? guid : "");
1115 1106          }
1116 1107  
1117 1108          ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip);
1118 1109          if (cdip == NULL)
1119 1110                  goto fail;
1120 1111  
1121 1112          /*
1122 1113           * Create component type and Global unique identifier
1123 1114           * properties
1124 1115           */
1125 1116          if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip,
1126 1117              MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) {
1127 1118                  goto fail;
1128 1119          }
1129 1120  
1130 1121          /* Decorate the node with compatible property */
1131 1122          if (compatible &&
1132 1123              (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip,
1133 1124              "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) {
1134 1125                  goto fail;
1135 1126          }
1136 1127  
1137 1128          return (cdip);
1138 1129  
1139 1130  fail:
1140 1131          if (cdip) {
1141 1132                  (void) ndi_prop_remove_all(cdip);
1142 1133                  (void) ndi_devi_free(cdip);
1143 1134          }
1144 1135          return (NULL);
1145 1136  }
1146 1137  
1147 1138  /*
1148 1139   * i_mdi_devinfo_find():
1149 1140   *              Find a matching devinfo node for given client node name
1150 1141   *              and its guid.
1151 1142   * Return Values:
1152 1143   *              Handle to a dev_info node or NULL
1153 1144   */
1154 1145  static dev_info_t *
1155 1146  i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid)
1156 1147  {
1157 1148          char                    *data;
1158 1149          dev_info_t              *cdip = NULL;
1159 1150          dev_info_t              *ndip = NULL;
1160 1151          int                     circular;
1161 1152  
1162 1153          ndi_devi_enter(vh->vh_dip, &circular);
1163 1154          ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child;
1164 1155          while ((cdip = ndip) != NULL) {
1165 1156                  ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1166 1157  
1167 1158                  if (strcmp(DEVI(cdip)->devi_node_name, name)) {
1168 1159                          continue;
1169 1160                  }
1170 1161  
1171 1162                  if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip,
1172 1163                      DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP,
1173 1164                      &data) != DDI_PROP_SUCCESS) {
1174 1165                          continue;
1175 1166                  }
1176 1167  
1177 1168                  if (strcmp(data, guid) != 0) {
1178 1169                          ddi_prop_free(data);
1179 1170                          continue;
1180 1171                  }
1181 1172                  ddi_prop_free(data);
1182 1173                  break;
1183 1174          }
1184 1175          ndi_devi_exit(vh->vh_dip, circular);
1185 1176          return (cdip);
1186 1177  }
1187 1178  
1188 1179  /*
  
    | 
      ↓ open down ↓ | 
    1140 lines elided | 
    
      ↑ open up ↑ | 
  
1189 1180   * i_mdi_devinfo_remove():
1190 1181   *              Remove a client device node
1191 1182   */
1192 1183  static int
1193 1184  i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags)
1194 1185  {
1195 1186          int     rv = MDI_SUCCESS;
1196 1187  
1197 1188          if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS ||
1198 1189              (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) {
1199      -                rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN | NDI_DEVI_REMOVE);
     1190 +                int nflags = NDI_DEVFS_CLEAN | NDI_DEVI_REMOVE;
     1191 +
     1192 +                if (flags & MDI_CLIENT_FLAGS_NO_EVENT)
     1193 +                        nflags |= NDI_NO_EVENT;
     1194 +
     1195 +                rv = ndi_devi_offline(cdip, nflags);
1200 1196                  if (rv != NDI_SUCCESS) {
1201 1197                          MDI_DEBUG(1, (MDI_NOTE, cdip,
1202 1198                              "!failed: cdip %p", (void *)cdip));
1203 1199                  }
1204 1200                  /*
1205 1201                   * Convert to MDI error code
1206 1202                   */
1207 1203                  switch (rv) {
1208 1204                  case NDI_SUCCESS:
1209 1205                          rv = MDI_SUCCESS;
1210 1206                          break;
1211 1207                  case NDI_BUSY:
1212 1208                          rv = MDI_BUSY;
1213 1209                          break;
1214 1210                  default:
1215 1211                          rv = MDI_FAILURE;
1216 1212                          break;
1217 1213                  }
1218 1214          }
1219 1215          return (rv);
1220 1216  }
1221 1217  
1222 1218  /*
1223 1219   * i_devi_get_client()
1224 1220   *              Utility function to get mpxio component extensions
1225 1221   */
1226 1222  static mdi_client_t *
1227 1223  i_devi_get_client(dev_info_t *cdip)
1228 1224  {
1229 1225          mdi_client_t    *ct = NULL;
1230 1226  
1231 1227          if (MDI_CLIENT(cdip)) {
1232 1228                  ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client;
1233 1229          }
1234 1230          return (ct);
1235 1231  }
1236 1232  
1237 1233  /*
1238 1234   * i_mdi_is_child_present():
1239 1235   *              Search for the presence of client device dev_info node
1240 1236   */
1241 1237  static int
1242 1238  i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip)
1243 1239  {
1244 1240          int             rv = MDI_FAILURE;
1245 1241          struct dev_info *dip;
1246 1242          int             circular;
1247 1243  
1248 1244          ndi_devi_enter(vdip, &circular);
1249 1245          dip = DEVI(vdip)->devi_child;
1250 1246          while (dip) {
1251 1247                  if (dip == DEVI(cdip)) {
1252 1248                          rv = MDI_SUCCESS;
1253 1249                          break;
1254 1250                  }
1255 1251                  dip = dip->devi_sibling;
1256 1252          }
1257 1253          ndi_devi_exit(vdip, circular);
1258 1254          return (rv);
1259 1255  }
1260 1256  
1261 1257  
1262 1258  /*
1263 1259   * i_mdi_client_lock():
1264 1260   *              Grab client component lock
1265 1261   * Return Values:
1266 1262   *              None
1267 1263   * Note:
1268 1264   *              The default locking order is:
1269 1265   *              _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
1270 1266   *              But there are number of situations where locks need to be
1271 1267   *              grabbed in reverse order.  This routine implements try and lock
1272 1268   *              mechanism depending on the requested parameter option.
1273 1269   */
1274 1270  static void
1275 1271  i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip)
1276 1272  {
1277 1273          if (pip) {
1278 1274                  /*
1279 1275                   * Reverse locking is requested.
1280 1276                   */
1281 1277                  while (MDI_CLIENT_TRYLOCK(ct) == 0) {
1282 1278                          if (servicing_interrupt()) {
1283 1279                                  MDI_PI_HOLD(pip);
1284 1280                                  MDI_PI_UNLOCK(pip);
1285 1281                                  MDI_CLIENT_LOCK(ct);
1286 1282                                  MDI_PI_LOCK(pip);
1287 1283                                  MDI_PI_RELE(pip);
1288 1284                                  break;
1289 1285                          } else {
1290 1286                                  /*
1291 1287                                   * tryenter failed. Try to grab again
1292 1288                                   * after a small delay
1293 1289                                   */
1294 1290                                  MDI_PI_HOLD(pip);
1295 1291                                  MDI_PI_UNLOCK(pip);
1296 1292                                  delay_random(mdi_delay);
1297 1293                                  MDI_PI_LOCK(pip);
1298 1294                                  MDI_PI_RELE(pip);
1299 1295                          }
1300 1296                  }
1301 1297          } else {
1302 1298                  MDI_CLIENT_LOCK(ct);
1303 1299          }
1304 1300  }
1305 1301  
1306 1302  /*
1307 1303   * i_mdi_client_unlock():
1308 1304   *              Unlock a client component
1309 1305   */
1310 1306  static void
1311 1307  i_mdi_client_unlock(mdi_client_t *ct)
1312 1308  {
1313 1309          MDI_CLIENT_UNLOCK(ct);
1314 1310  }
1315 1311  
1316 1312  /*
1317 1313   * i_mdi_client_alloc():
1318 1314   *              Allocate and initialize a client structure.  Caller should
1319 1315   *              hold the vhci client lock.
1320 1316   * Return Values:
1321 1317   *              Handle to a client component
1322 1318   */
1323 1319  /*ARGSUSED*/
1324 1320  static mdi_client_t *
1325 1321  i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid)
1326 1322  {
1327 1323          mdi_client_t    *ct;
1328 1324  
1329 1325          ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1330 1326  
1331 1327          /*
1332 1328           * Allocate and initialize a component structure.
1333 1329           */
1334 1330          ct = kmem_zalloc(sizeof (*ct), KM_SLEEP);
1335 1331          mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
1336 1332          ct->ct_hnext = NULL;
1337 1333          ct->ct_hprev = NULL;
1338 1334          ct->ct_dip = NULL;
1339 1335          ct->ct_vhci = vh;
1340 1336          ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP);
1341 1337          (void) strcpy(ct->ct_drvname, name);
1342 1338          ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP);
1343 1339          (void) strcpy(ct->ct_guid, lguid);
1344 1340          ct->ct_cprivate = NULL;
1345 1341          ct->ct_vprivate = NULL;
1346 1342          ct->ct_flags = 0;
1347 1343          ct->ct_state = MDI_CLIENT_STATE_FAILED;
1348 1344          MDI_CLIENT_LOCK(ct);
1349 1345          MDI_CLIENT_SET_OFFLINE(ct);
1350 1346          MDI_CLIENT_SET_DETACH(ct);
1351 1347          MDI_CLIENT_SET_POWER_UP(ct);
1352 1348          MDI_CLIENT_UNLOCK(ct);
1353 1349          ct->ct_failover_flags = 0;
1354 1350          ct->ct_failover_status = 0;
1355 1351          cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL);
1356 1352          ct->ct_unstable = 0;
1357 1353          cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL);
1358 1354          cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL);
1359 1355          ct->ct_lb = vh->vh_lb;
1360 1356          ct->ct_lb_args =  kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP);
1361 1357          ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE;
1362 1358          ct->ct_path_count = 0;
1363 1359          ct->ct_path_head = NULL;
1364 1360          ct->ct_path_tail = NULL;
1365 1361          ct->ct_path_last = NULL;
1366 1362  
1367 1363          /*
1368 1364           * Add this client component to our client hash queue
1369 1365           */
1370 1366          i_mdi_client_enlist_table(vh, ct);
1371 1367          return (ct);
1372 1368  }
1373 1369  
1374 1370  /*
1375 1371   * i_mdi_client_enlist_table():
1376 1372   *              Attach the client device to the client hash table. Caller
1377 1373   *              should hold the vhci client lock.
1378 1374   */
1379 1375  static void
1380 1376  i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1381 1377  {
1382 1378          int                     index;
1383 1379          struct client_hash      *head;
1384 1380  
1385 1381          ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1386 1382  
1387 1383          index = i_mdi_get_hash_key(ct->ct_guid);
1388 1384          head = &vh->vh_client_table[index];
1389 1385          ct->ct_hnext = (mdi_client_t *)head->ct_hash_head;
1390 1386          head->ct_hash_head = ct;
1391 1387          head->ct_hash_count++;
1392 1388          vh->vh_client_count++;
1393 1389  }
1394 1390  
1395 1391  /*
1396 1392   * i_mdi_client_delist_table():
1397 1393   *              Attach the client device to the client hash table.
1398 1394   *              Caller should hold the vhci client lock.
1399 1395   */
1400 1396  static void
1401 1397  i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1402 1398  {
1403 1399          int                     index;
1404 1400          char                    *guid;
1405 1401          struct client_hash      *head;
1406 1402          mdi_client_t            *next;
1407 1403          mdi_client_t            *last;
1408 1404  
1409 1405          ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1410 1406  
1411 1407          guid = ct->ct_guid;
1412 1408          index = i_mdi_get_hash_key(guid);
1413 1409          head = &vh->vh_client_table[index];
1414 1410  
1415 1411          last = NULL;
1416 1412          next = (mdi_client_t *)head->ct_hash_head;
1417 1413          while (next != NULL) {
1418 1414                  if (next == ct) {
1419 1415                          break;
1420 1416                  }
1421 1417                  last = next;
1422 1418                  next = next->ct_hnext;
1423 1419          }
1424 1420  
1425 1421          if (next) {
1426 1422                  head->ct_hash_count--;
1427 1423                  if (last == NULL) {
1428 1424                          head->ct_hash_head = ct->ct_hnext;
1429 1425                  } else {
1430 1426                          last->ct_hnext = ct->ct_hnext;
1431 1427                  }
1432 1428                  ct->ct_hnext = NULL;
1433 1429                  vh->vh_client_count--;
1434 1430          }
1435 1431  }
1436 1432  
1437 1433  
1438 1434  /*
1439 1435   * i_mdi_client_free():
1440 1436   *              Free a client component
1441 1437   */
1442 1438  static int
1443 1439  i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct)
1444 1440  {
1445 1441          int             rv = MDI_SUCCESS;
1446 1442          int             flags = ct->ct_flags;
1447 1443          dev_info_t      *cdip;
1448 1444          dev_info_t      *vdip;
1449 1445  
1450 1446          ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1451 1447  
1452 1448          vdip = vh->vh_dip;
1453 1449          cdip = ct->ct_dip;
1454 1450  
1455 1451          (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP);
1456 1452          DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT;
1457 1453          DEVI(cdip)->devi_mdi_client = NULL;
1458 1454  
1459 1455          /*
1460 1456           * Clear out back ref. to dev_info_t node
1461 1457           */
1462 1458          ct->ct_dip = NULL;
1463 1459  
1464 1460          /*
1465 1461           * Remove this client from our hash queue
1466 1462           */
1467 1463          i_mdi_client_delist_table(vh, ct);
1468 1464  
1469 1465          /*
1470 1466           * Uninitialize and free the component
1471 1467           */
1472 1468          kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1);
1473 1469          kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1);
1474 1470          kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t));
1475 1471          cv_destroy(&ct->ct_failover_cv);
1476 1472          cv_destroy(&ct->ct_unstable_cv);
1477 1473          cv_destroy(&ct->ct_powerchange_cv);
1478 1474          mutex_destroy(&ct->ct_mutex);
1479 1475          kmem_free(ct, sizeof (*ct));
1480 1476  
1481 1477          if (cdip != NULL) {
1482 1478                  MDI_VHCI_CLIENT_UNLOCK(vh);
1483 1479                  (void) i_mdi_devinfo_remove(vdip, cdip, flags);
1484 1480                  MDI_VHCI_CLIENT_LOCK(vh);
1485 1481          }
1486 1482          return (rv);
1487 1483  }
1488 1484  
1489 1485  /*
1490 1486   * i_mdi_client_find():
1491 1487   *              Find the client structure corresponding to a given guid
1492 1488   *              Caller should hold the vhci client lock.
1493 1489   */
1494 1490  static mdi_client_t *
1495 1491  i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid)
1496 1492  {
1497 1493          int                     index;
1498 1494          struct client_hash      *head;
1499 1495          mdi_client_t            *ct;
1500 1496  
1501 1497          ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1502 1498  
1503 1499          index = i_mdi_get_hash_key(guid);
1504 1500          head = &vh->vh_client_table[index];
1505 1501  
1506 1502          ct = head->ct_hash_head;
1507 1503          while (ct != NULL) {
1508 1504                  if (strcmp(ct->ct_guid, guid) == 0 &&
1509 1505                      (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) {
1510 1506                          break;
1511 1507                  }
1512 1508                  ct = ct->ct_hnext;
1513 1509          }
1514 1510          return (ct);
1515 1511  }
1516 1512  
1517 1513  /*
1518 1514   * i_mdi_client_update_state():
1519 1515   *              Compute and update client device state
1520 1516   * Notes:
1521 1517   *              A client device can be in any of three possible states:
1522 1518   *
1523 1519   *              MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more
1524 1520   *              one online/standby paths. Can tolerate failures.
1525 1521   *              MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with
1526 1522   *              no alternate paths available as standby. A failure on the online
1527 1523   *              would result in loss of access to device data.
1528 1524   *              MDI_CLIENT_STATE_FAILED - Client device in failed state with
1529 1525   *              no paths available to access the device.
1530 1526   */
1531 1527  static void
1532 1528  i_mdi_client_update_state(mdi_client_t *ct)
1533 1529  {
1534 1530          int state;
1535 1531  
1536 1532          ASSERT(MDI_CLIENT_LOCKED(ct));
1537 1533          state = i_mdi_client_compute_state(ct, NULL);
1538 1534          MDI_CLIENT_SET_STATE(ct, state);
1539 1535  }
1540 1536  
1541 1537  /*
1542 1538   * i_mdi_client_compute_state():
1543 1539   *              Compute client device state
1544 1540   *
1545 1541   *              mdi_phci_t *    Pointer to pHCI structure which should
1546 1542   *                              while computing the new value.  Used by
1547 1543   *                              i_mdi_phci_offline() to find the new
1548 1544   *                              client state after DR of a pHCI.
1549 1545   */
1550 1546  static int
1551 1547  i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph)
1552 1548  {
1553 1549          int             state;
1554 1550          int             online_count = 0;
1555 1551          int             standby_count = 0;
1556 1552          mdi_pathinfo_t  *pip, *next;
1557 1553  
1558 1554          ASSERT(MDI_CLIENT_LOCKED(ct));
1559 1555          pip = ct->ct_path_head;
1560 1556          while (pip != NULL) {
1561 1557                  MDI_PI_LOCK(pip);
1562 1558                  next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1563 1559                  if (MDI_PI(pip)->pi_phci == ph) {
1564 1560                          MDI_PI_UNLOCK(pip);
1565 1561                          pip = next;
1566 1562                          continue;
1567 1563                  }
1568 1564  
1569 1565                  if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1570 1566                                  == MDI_PATHINFO_STATE_ONLINE)
1571 1567                          online_count++;
1572 1568                  else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1573 1569                                  == MDI_PATHINFO_STATE_STANDBY)
1574 1570                          standby_count++;
1575 1571                  MDI_PI_UNLOCK(pip);
1576 1572                  pip = next;
1577 1573          }
1578 1574  
1579 1575          if (online_count == 0) {
1580 1576                  if (standby_count == 0) {
1581 1577                          state = MDI_CLIENT_STATE_FAILED;
1582 1578                          MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
1583 1579                              "client state failed: ct = %p", (void *)ct));
1584 1580                  } else if (standby_count == 1) {
1585 1581                          state = MDI_CLIENT_STATE_DEGRADED;
1586 1582                  } else {
1587 1583                          state = MDI_CLIENT_STATE_OPTIMAL;
1588 1584                  }
1589 1585          } else if (online_count == 1) {
1590 1586                  if (standby_count == 0) {
1591 1587                          state = MDI_CLIENT_STATE_DEGRADED;
1592 1588                  } else {
1593 1589                          state = MDI_CLIENT_STATE_OPTIMAL;
1594 1590                  }
1595 1591          } else {
1596 1592                  state = MDI_CLIENT_STATE_OPTIMAL;
1597 1593          }
1598 1594          return (state);
1599 1595  }
1600 1596  
1601 1597  /*
1602 1598   * i_mdi_client2devinfo():
1603 1599   *              Utility function
1604 1600   */
1605 1601  dev_info_t *
1606 1602  i_mdi_client2devinfo(mdi_client_t *ct)
1607 1603  {
1608 1604          return (ct->ct_dip);
1609 1605  }
1610 1606  
1611 1607  /*
1612 1608   * mdi_client_path2_devinfo():
1613 1609   *              Given the parent devinfo and child devfs pathname, search for
1614 1610   *              a valid devfs node handle.
1615 1611   */
1616 1612  dev_info_t *
1617 1613  mdi_client_path2devinfo(dev_info_t *vdip, char *pathname)
1618 1614  {
1619 1615          dev_info_t      *cdip = NULL;
1620 1616          dev_info_t      *ndip = NULL;
1621 1617          char            *temp_pathname;
1622 1618          int             circular;
1623 1619  
1624 1620          /*
1625 1621           * Allocate temp buffer
1626 1622           */
1627 1623          temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1628 1624  
1629 1625          /*
1630 1626           * Lock parent against changes
1631 1627           */
1632 1628          ndi_devi_enter(vdip, &circular);
1633 1629          ndip = (dev_info_t *)DEVI(vdip)->devi_child;
1634 1630          while ((cdip = ndip) != NULL) {
1635 1631                  ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1636 1632  
1637 1633                  *temp_pathname = '\0';
1638 1634                  (void) ddi_pathname(cdip, temp_pathname);
1639 1635                  if (strcmp(temp_pathname, pathname) == 0) {
1640 1636                          break;
1641 1637                  }
1642 1638          }
1643 1639          /*
1644 1640           * Release devinfo lock
1645 1641           */
1646 1642          ndi_devi_exit(vdip, circular);
1647 1643  
1648 1644          /*
1649 1645           * Free the temp buffer
1650 1646           */
1651 1647          kmem_free(temp_pathname, MAXPATHLEN);
1652 1648          return (cdip);
1653 1649  }
1654 1650  
1655 1651  /*
1656 1652   * mdi_client_get_path_count():
1657 1653   *              Utility function to get number of path information nodes
1658 1654   *              associated with a given client device.
1659 1655   */
1660 1656  int
1661 1657  mdi_client_get_path_count(dev_info_t *cdip)
1662 1658  {
1663 1659          mdi_client_t    *ct;
1664 1660          int             count = 0;
1665 1661  
1666 1662          ct = i_devi_get_client(cdip);
1667 1663          if (ct != NULL) {
1668 1664                  count = ct->ct_path_count;
1669 1665          }
1670 1666          return (count);
1671 1667  }
1672 1668  
1673 1669  
1674 1670  /*
1675 1671   * i_mdi_get_hash_key():
1676 1672   *              Create a hash using strings as keys
1677 1673   *
1678 1674   */
1679 1675  static int
1680 1676  i_mdi_get_hash_key(char *str)
1681 1677  {
1682 1678          uint32_t        g, hash = 0;
1683 1679          char            *p;
1684 1680  
1685 1681          for (p = str; *p != '\0'; p++) {
1686 1682                  g = *p;
1687 1683                  hash += g;
1688 1684          }
1689 1685          return (hash % (CLIENT_HASH_TABLE_SIZE - 1));
1690 1686  }
1691 1687  
1692 1688  /*
1693 1689   * mdi_get_lb_policy():
1694 1690   *              Get current load balancing policy for a given client device
1695 1691   */
1696 1692  client_lb_t
1697 1693  mdi_get_lb_policy(dev_info_t *cdip)
1698 1694  {
1699 1695          client_lb_t     lb = LOAD_BALANCE_NONE;
1700 1696          mdi_client_t    *ct;
1701 1697  
1702 1698          ct = i_devi_get_client(cdip);
1703 1699          if (ct != NULL) {
1704 1700                  lb = ct->ct_lb;
1705 1701          }
1706 1702          return (lb);
1707 1703  }
1708 1704  
1709 1705  /*
1710 1706   * mdi_set_lb_region_size():
1711 1707   *              Set current region size for the load-balance
1712 1708   */
1713 1709  int
1714 1710  mdi_set_lb_region_size(dev_info_t *cdip, int region_size)
1715 1711  {
1716 1712          mdi_client_t    *ct;
1717 1713          int             rv = MDI_FAILURE;
1718 1714  
1719 1715          ct = i_devi_get_client(cdip);
1720 1716          if (ct != NULL && ct->ct_lb_args != NULL) {
1721 1717                  ct->ct_lb_args->region_size = region_size;
1722 1718                  rv = MDI_SUCCESS;
1723 1719          }
1724 1720          return (rv);
1725 1721  }
1726 1722  
1727 1723  /*
1728 1724   * mdi_Set_lb_policy():
1729 1725   *              Set current load balancing policy for a given client device
1730 1726   */
1731 1727  int
1732 1728  mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb)
1733 1729  {
1734 1730          mdi_client_t    *ct;
1735 1731          int             rv = MDI_FAILURE;
1736 1732  
1737 1733          ct = i_devi_get_client(cdip);
1738 1734          if (ct != NULL) {
1739 1735                  ct->ct_lb = lb;
1740 1736                  rv = MDI_SUCCESS;
1741 1737          }
1742 1738          return (rv);
1743 1739  }
1744 1740  
1745 1741  /*
1746 1742   * mdi_failover():
1747 1743   *              failover function called by the vHCI drivers to initiate
1748 1744   *              a failover operation.  This is typically due to non-availability
1749 1745   *              of online paths to route I/O requests.  Failover can be
1750 1746   *              triggered through user application also.
1751 1747   *
1752 1748   *              The vHCI driver calls mdi_failover() to initiate a failover
1753 1749   *              operation. mdi_failover() calls back into the vHCI driver's
1754 1750   *              vo_failover() entry point to perform the actual failover
1755 1751   *              operation.  The reason for requiring the vHCI driver to
1756 1752   *              initiate failover by calling mdi_failover(), instead of directly
1757 1753   *              executing vo_failover() itself, is to ensure that the mdi
1758 1754   *              framework can keep track of the client state properly.
1759 1755   *              Additionally, mdi_failover() provides as a convenience the
1760 1756   *              option of performing the failover operation synchronously or
1761 1757   *              asynchronously
1762 1758   *
1763 1759   *              Upon successful completion of the failover operation, the
1764 1760   *              paths that were previously ONLINE will be in the STANDBY state,
1765 1761   *              and the newly activated paths will be in the ONLINE state.
1766 1762   *
1767 1763   *              The flags modifier determines whether the activation is done
1768 1764   *              synchronously: MDI_FAILOVER_SYNC
1769 1765   * Return Values:
1770 1766   *              MDI_SUCCESS
1771 1767   *              MDI_FAILURE
1772 1768   *              MDI_BUSY
1773 1769   */
1774 1770  /*ARGSUSED*/
1775 1771  int
1776 1772  mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
1777 1773  {
1778 1774          int                     rv;
1779 1775          mdi_client_t            *ct;
1780 1776  
1781 1777          ct = i_devi_get_client(cdip);
1782 1778          ASSERT(ct != NULL);
1783 1779          if (ct == NULL) {
1784 1780                  /* cdip is not a valid client device. Nothing more to do. */
1785 1781                  return (MDI_FAILURE);
1786 1782          }
1787 1783  
1788 1784          MDI_CLIENT_LOCK(ct);
1789 1785  
1790 1786          if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) {
1791 1787                  /* A path to the client is being freed */
1792 1788                  MDI_CLIENT_UNLOCK(ct);
1793 1789                  return (MDI_BUSY);
1794 1790          }
1795 1791  
1796 1792  
1797 1793          if (MDI_CLIENT_IS_FAILED(ct)) {
1798 1794                  /*
1799 1795                   * Client is in failed state. Nothing more to do.
1800 1796                   */
1801 1797                  MDI_CLIENT_UNLOCK(ct);
1802 1798                  return (MDI_FAILURE);
1803 1799          }
1804 1800  
1805 1801          if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1806 1802                  /*
1807 1803                   * Failover is already in progress; return BUSY
1808 1804                   */
1809 1805                  MDI_CLIENT_UNLOCK(ct);
1810 1806                  return (MDI_BUSY);
1811 1807          }
1812 1808          /*
1813 1809           * Make sure that mdi_pathinfo node state changes are processed.
1814 1810           * We do not allow failovers to progress while client path state
1815 1811           * changes are in progress
1816 1812           */
1817 1813          if (ct->ct_unstable) {
1818 1814                  if (flags == MDI_FAILOVER_ASYNC) {
1819 1815                          MDI_CLIENT_UNLOCK(ct);
1820 1816                          return (MDI_BUSY);
1821 1817                  } else {
1822 1818                          while (ct->ct_unstable)
1823 1819                                  cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex);
1824 1820                  }
1825 1821          }
1826 1822  
1827 1823          /*
1828 1824           * Client device is in stable state. Before proceeding, perform sanity
1829 1825           * checks again.
1830 1826           */
1831 1827          if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) ||
1832 1828              (!i_ddi_devi_attached(cdip))) {
1833 1829                  /*
1834 1830                   * Client is in failed state. Nothing more to do.
1835 1831                   */
1836 1832                  MDI_CLIENT_UNLOCK(ct);
1837 1833                  return (MDI_FAILURE);
1838 1834          }
1839 1835  
1840 1836          /*
1841 1837           * Set the client state as failover in progress.
1842 1838           */
1843 1839          MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct);
1844 1840          ct->ct_failover_flags = flags;
1845 1841          MDI_CLIENT_UNLOCK(ct);
1846 1842  
1847 1843          if (flags == MDI_FAILOVER_ASYNC) {
1848 1844                  /*
1849 1845                   * Submit the initiate failover request via CPR safe
1850 1846                   * taskq threads.
1851 1847                   */
1852 1848                  (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover,
1853 1849                      ct, KM_SLEEP);
1854 1850                  return (MDI_ACCEPT);
1855 1851          } else {
1856 1852                  /*
1857 1853                   * Synchronous failover mode.  Typically invoked from the user
1858 1854                   * land.
1859 1855                   */
1860 1856                  rv = i_mdi_failover(ct);
1861 1857          }
1862 1858          return (rv);
1863 1859  }
1864 1860  
1865 1861  /*
1866 1862   * i_mdi_failover():
1867 1863   *              internal failover function. Invokes vHCI drivers failover
1868 1864   *              callback function and process the failover status
1869 1865   * Return Values:
1870 1866   *              None
1871 1867   *
1872 1868   * Note: A client device in failover state can not be detached or freed.
1873 1869   */
1874 1870  static int
1875 1871  i_mdi_failover(void *arg)
1876 1872  {
1877 1873          int             rv = MDI_SUCCESS;
1878 1874          mdi_client_t    *ct = (mdi_client_t *)arg;
1879 1875          mdi_vhci_t      *vh = ct->ct_vhci;
1880 1876  
1881 1877          ASSERT(!MDI_CLIENT_LOCKED(ct));
1882 1878  
1883 1879          if (vh->vh_ops->vo_failover != NULL) {
1884 1880                  /*
1885 1881                   * Call vHCI drivers callback routine
1886 1882                   */
1887 1883                  rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip,
1888 1884                      ct->ct_failover_flags);
1889 1885          }
1890 1886  
1891 1887          MDI_CLIENT_LOCK(ct);
1892 1888          MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct);
1893 1889  
1894 1890          /*
1895 1891           * Save the failover return status
1896 1892           */
1897 1893          ct->ct_failover_status = rv;
1898 1894  
1899 1895          /*
1900 1896           * As a result of failover, client status would have been changed.
1901 1897           * Update the client state and wake up anyone waiting on this client
1902 1898           * device.
1903 1899           */
1904 1900          i_mdi_client_update_state(ct);
1905 1901  
1906 1902          cv_broadcast(&ct->ct_failover_cv);
1907 1903          MDI_CLIENT_UNLOCK(ct);
1908 1904          return (rv);
1909 1905  }
1910 1906  
1911 1907  /*
1912 1908   * Load balancing is logical block.
1913 1909   * IOs within the range described by region_size
1914 1910   * would go on the same path. This would improve the
1915 1911   * performance by cache-hit on some of the RAID devices.
1916 1912   * Search only for online paths(At some point we
1917 1913   * may want to balance across target ports).
1918 1914   * If no paths are found then default to round-robin.
1919 1915   */
1920 1916  static int
1921 1917  i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp)
1922 1918  {
1923 1919          int             path_index = -1;
1924 1920          int             online_path_count = 0;
1925 1921          int             online_nonpref_path_count = 0;
1926 1922          int             region_size = ct->ct_lb_args->region_size;
1927 1923          mdi_pathinfo_t  *pip;
1928 1924          mdi_pathinfo_t  *next;
1929 1925          int             preferred, path_cnt;
1930 1926  
1931 1927          pip = ct->ct_path_head;
1932 1928          while (pip) {
1933 1929                  MDI_PI_LOCK(pip);
1934 1930                  if (MDI_PI(pip)->pi_state ==
1935 1931                      MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) {
1936 1932                          online_path_count++;
1937 1933                  } else if (MDI_PI(pip)->pi_state ==
1938 1934                      MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) {
1939 1935                          online_nonpref_path_count++;
1940 1936                  }
1941 1937                  next = (mdi_pathinfo_t *)
1942 1938                      MDI_PI(pip)->pi_client_link;
1943 1939                  MDI_PI_UNLOCK(pip);
1944 1940                  pip = next;
1945 1941          }
1946 1942          /* if found any online/preferred then use this type */
1947 1943          if (online_path_count > 0) {
1948 1944                  path_cnt = online_path_count;
1949 1945                  preferred = 1;
1950 1946          } else if (online_nonpref_path_count > 0) {
1951 1947                  path_cnt = online_nonpref_path_count;
1952 1948                  preferred = 0;
1953 1949          } else {
1954 1950                  path_cnt = 0;
1955 1951          }
1956 1952          if (path_cnt) {
1957 1953                  path_index = (bp->b_blkno >> region_size) % path_cnt;
1958 1954                  pip = ct->ct_path_head;
1959 1955                  while (pip && path_index != -1) {
1960 1956                          MDI_PI_LOCK(pip);
1961 1957                          if (path_index == 0 &&
1962 1958                              (MDI_PI(pip)->pi_state ==
1963 1959                              MDI_PATHINFO_STATE_ONLINE) &&
1964 1960                                  MDI_PI(pip)->pi_preferred == preferred) {
1965 1961                                  MDI_PI_HOLD(pip);
1966 1962                                  MDI_PI_UNLOCK(pip);
1967 1963                                  *ret_pip = pip;
1968 1964                                  return (MDI_SUCCESS);
1969 1965                          }
1970 1966                          path_index --;
1971 1967                          next = (mdi_pathinfo_t *)
1972 1968                              MDI_PI(pip)->pi_client_link;
1973 1969                          MDI_PI_UNLOCK(pip);
1974 1970                          pip = next;
1975 1971                  }
1976 1972                  MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
1977 1973                      "lba %llx: path %s %p",
1978 1974                      bp->b_lblkno, mdi_pi_spathname(pip), (void *)pip));
1979 1975          }
1980 1976          return (MDI_FAILURE);
1981 1977  }
1982 1978  
1983 1979  /*
1984 1980   * mdi_select_path():
1985 1981   *              select a path to access a client device.
1986 1982   *
1987 1983   *              mdi_select_path() function is called by the vHCI drivers to
1988 1984   *              select a path to route the I/O request to.  The caller passes
1989 1985   *              the block I/O data transfer structure ("buf") as one of the
1990 1986   *              parameters.  The mpxio framework uses the buf structure
1991 1987   *              contents to maintain per path statistics (total I/O size /
1992 1988   *              count pending).  If more than one online paths are available to
1993 1989   *              select, the framework automatically selects a suitable path
1994 1990   *              for routing I/O request. If a failover operation is active for
1995 1991   *              this client device the call shall be failed with MDI_BUSY error
1996 1992   *              code.
1997 1993   *
1998 1994   *              By default this function returns a suitable path in online
1999 1995   *              state based on the current load balancing policy.  Currently
2000 1996   *              we support LOAD_BALANCE_NONE (Previously selected online path
2001 1997   *              will continue to be used till the path is usable) and
2002 1998   *              LOAD_BALANCE_RR (Online paths will be selected in a round
2003 1999   *              robin fashion), LOAD_BALANCE_LB(Online paths will be selected
2004 2000   *              based on the logical block).  The load balancing
2005 2001   *              through vHCI drivers configuration file (driver.conf).
2006 2002   *
2007 2003   *              vHCI drivers may override this default behavior by specifying
2008 2004   *              appropriate flags.  The meaning of the thrid argument depends
2009 2005   *              on the flags specified. If MDI_SELECT_PATH_INSTANCE is set
2010 2006   *              then the argument is the "path instance" of the path to select.
2011 2007   *              If MDI_SELECT_PATH_INSTANCE is not set then the argument is
2012 2008   *              "start_pip". A non NULL "start_pip" is the starting point to
2013 2009   *              walk and find the next appropriate path.  The following values
2014 2010   *              are currently defined: MDI_SELECT_ONLINE_PATH (to select an
2015 2011   *              ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an
2016 2012   *              STANDBY path).
2017 2013   *
2018 2014   *              The non-standard behavior is used by the scsi_vhci driver,
2019 2015   *              whenever it has to use a STANDBY/FAULTED path.  Eg. during
2020 2016   *              attach of client devices (to avoid an unnecessary failover
2021 2017   *              when the STANDBY path comes up first), during failover
2022 2018   *              (to activate a STANDBY path as ONLINE).
2023 2019   *
2024 2020   *              The selected path is returned in a a mdi_hold_path() state
2025 2021   *              (pi_ref_cnt). Caller should release the hold by calling
2026 2022   *              mdi_rele_path().
2027 2023   *
2028 2024   * Return Values:
2029 2025   *              MDI_SUCCESS     - Completed successfully
2030 2026   *              MDI_BUSY        - Client device is busy failing over
2031 2027   *              MDI_NOPATH      - Client device is online, but no valid path are
2032 2028   *                                available to access this client device
2033 2029   *              MDI_FAILURE     - Invalid client device or state
2034 2030   *              MDI_DEVI_ONLINING
2035 2031   *                              - Client device (struct dev_info state) is in
2036 2032   *                                onlining state.
2037 2033   */
2038 2034  
2039 2035  /*ARGSUSED*/
2040 2036  int
2041 2037  mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags,
2042 2038      void *arg, mdi_pathinfo_t **ret_pip)
2043 2039  {
2044 2040          mdi_client_t    *ct;
2045 2041          mdi_pathinfo_t  *pip;
2046 2042          mdi_pathinfo_t  *next;
2047 2043          mdi_pathinfo_t  *head;
2048 2044          mdi_pathinfo_t  *start;
2049 2045          client_lb_t     lbp;    /* load balancing policy */
2050 2046          int             sb = 1; /* standard behavior */
2051 2047          int             preferred = 1;  /* preferred path */
2052 2048          int             cond, cont = 1;
2053 2049          int             retry = 0;
2054 2050          mdi_pathinfo_t  *start_pip;     /* request starting pathinfo */
2055 2051          int             path_instance;  /* request specific path instance */
2056 2052  
2057 2053          /* determine type of arg based on flags */
2058 2054          if (flags & MDI_SELECT_PATH_INSTANCE) {
2059 2055                  path_instance = (int)(intptr_t)arg;
2060 2056                  start_pip = NULL;
2061 2057          } else {
2062 2058                  path_instance = 0;
2063 2059                  start_pip = (mdi_pathinfo_t *)arg;
2064 2060          }
2065 2061  
2066 2062          if (flags != 0) {
2067 2063                  /*
2068 2064                   * disable default behavior
2069 2065                   */
2070 2066                  sb = 0;
2071 2067          }
2072 2068  
2073 2069          *ret_pip = NULL;
2074 2070          ct = i_devi_get_client(cdip);
2075 2071          if (ct == NULL) {
2076 2072                  /* mdi extensions are NULL, Nothing more to do */
2077 2073                  return (MDI_FAILURE);
2078 2074          }
2079 2075  
2080 2076          MDI_CLIENT_LOCK(ct);
2081 2077  
2082 2078          if (sb) {
2083 2079                  if (MDI_CLIENT_IS_FAILED(ct)) {
2084 2080                          /*
2085 2081                           * Client is not ready to accept any I/O requests.
2086 2082                           * Fail this request.
2087 2083                           */
2088 2084                          MDI_DEBUG(2, (MDI_NOTE, cdip,
2089 2085                              "client state offline ct = %p", (void *)ct));
2090 2086                          MDI_CLIENT_UNLOCK(ct);
2091 2087                          return (MDI_FAILURE);
2092 2088                  }
2093 2089  
2094 2090                  if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
2095 2091                          /*
2096 2092                           * Check for Failover is in progress. If so tell the
2097 2093                           * caller that this device is busy.
2098 2094                           */
2099 2095                          MDI_DEBUG(2, (MDI_NOTE, cdip,
2100 2096                              "client failover in progress ct = %p",
2101 2097                              (void *)ct));
2102 2098                          MDI_CLIENT_UNLOCK(ct);
2103 2099                          return (MDI_BUSY);
2104 2100                  }
2105 2101  
2106 2102                  /*
2107 2103                   * Check to see whether the client device is attached.
2108 2104                   * If not so, let the vHCI driver manually select a path
2109 2105                   * (standby) and let the probe/attach process to continue.
2110 2106                   */
2111 2107                  if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) {
2112 2108                          MDI_DEBUG(4, (MDI_NOTE, cdip,
2113 2109                              "devi is onlining ct = %p", (void *)ct));
2114 2110                          MDI_CLIENT_UNLOCK(ct);
2115 2111                          return (MDI_DEVI_ONLINING);
2116 2112                  }
2117 2113          }
2118 2114  
2119 2115          /*
2120 2116           * Cache in the client list head.  If head of the list is NULL
2121 2117           * return MDI_NOPATH
2122 2118           */
2123 2119          head = ct->ct_path_head;
2124 2120          if (head == NULL) {
2125 2121                  MDI_CLIENT_UNLOCK(ct);
2126 2122                  return (MDI_NOPATH);
2127 2123          }
2128 2124  
2129 2125          /* Caller is specifying a specific pathinfo path by path_instance */
2130 2126          if (path_instance) {
2131 2127                  /* search for pathinfo with correct path_instance */
2132 2128                  for (pip = head;
2133 2129                      pip && (mdi_pi_get_path_instance(pip) != path_instance);
2134 2130                      pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link)
2135 2131                          ;
2136 2132  
2137 2133                  /* If path can't be selected then MDI_NOPATH is returned. */
2138 2134                  if (pip == NULL) {
2139 2135                          MDI_CLIENT_UNLOCK(ct);
2140 2136                          return (MDI_NOPATH);
2141 2137                  }
2142 2138  
2143 2139                  /*
2144 2140                   * Verify state of path. When asked to select a specific
2145 2141                   * path_instance, we select the requested path in any
2146 2142                   * state (ONLINE, OFFLINE, STANDBY, FAULT) other than INIT.
2147 2143                   * We don't however select paths where the pHCI has detached.
2148 2144                   * NOTE: last pathinfo node of an opened client device may
2149 2145                   * exist in an OFFLINE state after the pHCI associated with
2150 2146                   * that path has detached (but pi_phci will be NULL if that
2151 2147                   * has occurred).
2152 2148                   */
2153 2149                  MDI_PI_LOCK(pip);
2154 2150                  if ((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_INIT) ||
2155 2151                      (MDI_PI(pip)->pi_phci == NULL)) {
2156 2152                          MDI_PI_UNLOCK(pip);
2157 2153                          MDI_CLIENT_UNLOCK(ct);
2158 2154                          return (MDI_FAILURE);
2159 2155                  }
2160 2156  
2161 2157                  /* Return MDI_BUSY if we have a transient condition */
2162 2158                  if (MDI_PI_IS_TRANSIENT(pip)) {
2163 2159                          MDI_PI_UNLOCK(pip);
2164 2160                          MDI_CLIENT_UNLOCK(ct);
2165 2161                          return (MDI_BUSY);
2166 2162                  }
2167 2163  
2168 2164                  /*
2169 2165                   * Return the path in hold state. Caller should release the
2170 2166                   * lock by calling mdi_rele_path()
2171 2167                   */
2172 2168                  MDI_PI_HOLD(pip);
2173 2169                  MDI_PI_UNLOCK(pip);
2174 2170                  *ret_pip = pip;
2175 2171                  MDI_CLIENT_UNLOCK(ct);
2176 2172                  return (MDI_SUCCESS);
2177 2173          }
2178 2174  
2179 2175          /*
2180 2176           * for non default behavior, bypass current
2181 2177           * load balancing policy and always use LOAD_BALANCE_RR
2182 2178           * except that the start point will be adjusted based
2183 2179           * on the provided start_pip
2184 2180           */
2185 2181          lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR;
2186 2182  
2187 2183          switch (lbp) {
2188 2184          case LOAD_BALANCE_NONE:
2189 2185                  /*
2190 2186                   * Load balancing is None  or Alternate path mode
2191 2187                   * Start looking for a online mdi_pathinfo node starting from
2192 2188                   * last known selected path
2193 2189                   */
2194 2190                  preferred = 1;
2195 2191                  pip = (mdi_pathinfo_t *)ct->ct_path_last;
2196 2192                  if (pip == NULL) {
2197 2193                          pip = head;
2198 2194                  }
2199 2195                  start = pip;
2200 2196                  do {
2201 2197                          MDI_PI_LOCK(pip);
2202 2198                          /*
2203 2199                           * No need to explicitly check if the path is disabled.
2204 2200                           * Since we are checking for state == ONLINE and the
2205 2201                           * same variable is used for DISABLE/ENABLE information.
2206 2202                           */
2207 2203                          if ((MDI_PI(pip)->pi_state  ==
2208 2204                                  MDI_PATHINFO_STATE_ONLINE) &&
2209 2205                                  preferred == MDI_PI(pip)->pi_preferred) {
2210 2206                                  /*
2211 2207                                   * Return the path in hold state. Caller should
2212 2208                                   * release the lock by calling mdi_rele_path()
2213 2209                                   */
2214 2210                                  MDI_PI_HOLD(pip);
2215 2211                                  MDI_PI_UNLOCK(pip);
2216 2212                                  ct->ct_path_last = pip;
2217 2213                                  *ret_pip = pip;
2218 2214                                  MDI_CLIENT_UNLOCK(ct);
2219 2215                                  return (MDI_SUCCESS);
2220 2216                          }
2221 2217  
2222 2218                          /*
2223 2219                           * Path is busy.
2224 2220                           */
2225 2221                          if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2226 2222                              MDI_PI_IS_TRANSIENT(pip))
2227 2223                                  retry = 1;
2228 2224                          /*
2229 2225                           * Keep looking for a next available online path
2230 2226                           */
2231 2227                          next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2232 2228                          if (next == NULL) {
2233 2229                                  next = head;
2234 2230                          }
2235 2231                          MDI_PI_UNLOCK(pip);
2236 2232                          pip = next;
2237 2233                          if (start == pip && preferred) {
2238 2234                                  preferred = 0;
2239 2235                          } else if (start == pip && !preferred) {
2240 2236                                  cont = 0;
2241 2237                          }
2242 2238                  } while (cont);
2243 2239                  break;
2244 2240  
2245 2241          case LOAD_BALANCE_LBA:
2246 2242                  /*
2247 2243                   * Make sure we are looking
2248 2244                   * for an online path. Otherwise, if it is for a STANDBY
2249 2245                   * path request, it will go through and fetch an ONLINE
2250 2246                   * path which is not desirable.
2251 2247                   */
2252 2248                  if ((ct->ct_lb_args != NULL) &&
2253 2249                              (ct->ct_lb_args->region_size) && bp &&
2254 2250                                  (sb || (flags == MDI_SELECT_ONLINE_PATH))) {
2255 2251                          if (i_mdi_lba_lb(ct, ret_pip, bp)
2256 2252                                      == MDI_SUCCESS) {
2257 2253                                  MDI_CLIENT_UNLOCK(ct);
2258 2254                                  return (MDI_SUCCESS);
2259 2255                          }
2260 2256                  }
2261 2257                  /* FALLTHROUGH */
2262 2258          case LOAD_BALANCE_RR:
2263 2259                  /*
2264 2260                   * Load balancing is Round Robin. Start looking for a online
2265 2261                   * mdi_pathinfo node starting from last known selected path
2266 2262                   * as the start point.  If override flags are specified,
2267 2263                   * process accordingly.
2268 2264                   * If the search is already in effect(start_pip not null),
2269 2265                   * then lets just use the same path preference to continue the
2270 2266                   * traversal.
2271 2267                   */
2272 2268  
2273 2269                  if (start_pip != NULL) {
2274 2270                          preferred = MDI_PI(start_pip)->pi_preferred;
2275 2271                  } else {
2276 2272                          preferred = 1;
2277 2273                  }
2278 2274  
2279 2275                  start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip;
2280 2276                  if (start == NULL) {
2281 2277                          pip = head;
2282 2278                  } else {
2283 2279                          pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link;
2284 2280                          if (pip == NULL) {
2285 2281                                  if ( flags & MDI_SELECT_NO_PREFERRED) {
2286 2282                                          /*
2287 2283                                           * Return since we hit the end of list
2288 2284                                           */
2289 2285                                          MDI_CLIENT_UNLOCK(ct);
2290 2286                                          return (MDI_NOPATH);
2291 2287                                  }
2292 2288  
2293 2289                                  if (!sb) {
2294 2290                                          if (preferred == 0) {
2295 2291                                                  /*
2296 2292                                                   * Looks like we have completed
2297 2293                                                   * the traversal as preferred
2298 2294                                                   * value is 0. Time to bail out.
2299 2295                                                   */
2300 2296                                                  *ret_pip = NULL;
2301 2297                                                  MDI_CLIENT_UNLOCK(ct);
2302 2298                                                  return (MDI_NOPATH);
2303 2299                                          } else {
2304 2300                                                  /*
2305 2301                                                   * Looks like we reached the
2306 2302                                                   * end of the list. Lets enable
2307 2303                                                   * traversal of non preferred
2308 2304                                                   * paths.
2309 2305                                                   */
2310 2306                                                  preferred = 0;
2311 2307                                          }
2312 2308                                  }
2313 2309                                  pip = head;
2314 2310                          }
2315 2311                  }
2316 2312                  start = pip;
2317 2313                  do {
2318 2314                          MDI_PI_LOCK(pip);
2319 2315                          if (sb) {
2320 2316                                  cond = ((MDI_PI(pip)->pi_state ==
2321 2317                                      MDI_PATHINFO_STATE_ONLINE &&
2322 2318                                          MDI_PI(pip)->pi_preferred ==
2323 2319                                                  preferred) ? 1 : 0);
2324 2320                          } else {
2325 2321                                  if (flags == MDI_SELECT_ONLINE_PATH) {
2326 2322                                          cond = ((MDI_PI(pip)->pi_state ==
2327 2323                                              MDI_PATHINFO_STATE_ONLINE &&
2328 2324                                                  MDI_PI(pip)->pi_preferred ==
2329 2325                                                  preferred) ? 1 : 0);
2330 2326                                  } else if (flags == MDI_SELECT_STANDBY_PATH) {
2331 2327                                          cond = ((MDI_PI(pip)->pi_state ==
2332 2328                                              MDI_PATHINFO_STATE_STANDBY &&
2333 2329                                                  MDI_PI(pip)->pi_preferred ==
2334 2330                                                  preferred) ? 1 : 0);
2335 2331                                  } else if (flags == (MDI_SELECT_ONLINE_PATH |
2336 2332                                      MDI_SELECT_STANDBY_PATH)) {
2337 2333                                          cond = (((MDI_PI(pip)->pi_state ==
2338 2334                                              MDI_PATHINFO_STATE_ONLINE ||
2339 2335                                              (MDI_PI(pip)->pi_state ==
2340 2336                                              MDI_PATHINFO_STATE_STANDBY)) &&
2341 2337                                                  MDI_PI(pip)->pi_preferred ==
2342 2338                                                  preferred) ? 1 : 0);
2343 2339                                  } else if (flags ==
2344 2340                                          (MDI_SELECT_STANDBY_PATH |
2345 2341                                          MDI_SELECT_ONLINE_PATH |
2346 2342                                          MDI_SELECT_USER_DISABLE_PATH)) {
2347 2343                                          cond = (((MDI_PI(pip)->pi_state ==
2348 2344                                              MDI_PATHINFO_STATE_ONLINE ||
2349 2345                                              (MDI_PI(pip)->pi_state ==
2350 2346                                              MDI_PATHINFO_STATE_STANDBY) ||
2351 2347                                                  (MDI_PI(pip)->pi_state ==
2352 2348                                              (MDI_PATHINFO_STATE_ONLINE|
2353 2349                                              MDI_PATHINFO_STATE_USER_DISABLE)) ||
2354 2350                                                  (MDI_PI(pip)->pi_state ==
2355 2351                                              (MDI_PATHINFO_STATE_STANDBY |
2356 2352                                              MDI_PATHINFO_STATE_USER_DISABLE)))&&
2357 2353                                                  MDI_PI(pip)->pi_preferred ==
2358 2354                                                  preferred) ? 1 : 0);
2359 2355                                  } else if (flags ==
2360 2356                                      (MDI_SELECT_STANDBY_PATH |
2361 2357                                      MDI_SELECT_ONLINE_PATH |
2362 2358                                      MDI_SELECT_NO_PREFERRED)) {
2363 2359                                          cond = (((MDI_PI(pip)->pi_state ==
2364 2360                                              MDI_PATHINFO_STATE_ONLINE) ||
2365 2361                                              (MDI_PI(pip)->pi_state ==
2366 2362                                              MDI_PATHINFO_STATE_STANDBY))
2367 2363                                              ? 1 : 0);
2368 2364                                  } else {
2369 2365                                          cond = 0;
2370 2366                                  }
2371 2367                          }
2372 2368                          /*
2373 2369                           * No need to explicitly check if the path is disabled.
2374 2370                           * Since we are checking for state == ONLINE and the
2375 2371                           * same variable is used for DISABLE/ENABLE information.
2376 2372                           */
2377 2373                          if (cond) {
2378 2374                                  /*
2379 2375                                   * Return the path in hold state. Caller should
2380 2376                                   * release the lock by calling mdi_rele_path()
2381 2377                                   */
2382 2378                                  MDI_PI_HOLD(pip);
2383 2379                                  MDI_PI_UNLOCK(pip);
2384 2380                                  if (sb)
2385 2381                                          ct->ct_path_last = pip;
2386 2382                                  *ret_pip = pip;
2387 2383                                  MDI_CLIENT_UNLOCK(ct);
2388 2384                                  return (MDI_SUCCESS);
2389 2385                          }
2390 2386                          /*
2391 2387                           * Path is busy.
2392 2388                           */
2393 2389                          if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2394 2390                              MDI_PI_IS_TRANSIENT(pip))
2395 2391                                  retry = 1;
2396 2392  
2397 2393                          /*
2398 2394                           * Keep looking for a next available online path
2399 2395                           */
2400 2396  do_again:
2401 2397                          next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2402 2398                          if (next == NULL) {
2403 2399                                  if ( flags & MDI_SELECT_NO_PREFERRED) {
2404 2400                                          /*
2405 2401                                           * Bail out since we hit the end of list
2406 2402                                           */
2407 2403                                          MDI_PI_UNLOCK(pip);
2408 2404                                          break;
2409 2405                                  }
2410 2406  
2411 2407                                  if (!sb) {
2412 2408                                          if (preferred == 1) {
2413 2409                                                  /*
2414 2410                                                   * Looks like we reached the
2415 2411                                                   * end of the list. Lets enable
2416 2412                                                   * traversal of non preferred
2417 2413                                                   * paths.
2418 2414                                                   */
2419 2415                                                  preferred = 0;
2420 2416                                                  next = head;
2421 2417                                          } else {
2422 2418                                                  /*
2423 2419                                                   * We have done both the passes
2424 2420                                                   * Preferred as well as for
2425 2421                                                   * Non-preferred. Bail out now.
2426 2422                                                   */
2427 2423                                                  cont = 0;
2428 2424                                          }
2429 2425                                  } else {
2430 2426                                          /*
2431 2427                                           * Standard behavior case.
2432 2428                                           */
2433 2429                                          next = head;
2434 2430                                  }
2435 2431                          }
2436 2432                          MDI_PI_UNLOCK(pip);
2437 2433                          if (cont == 0) {
2438 2434                                  break;
2439 2435                          }
2440 2436                          pip = next;
2441 2437  
2442 2438                          if (!sb) {
2443 2439                                  /*
2444 2440                                   * We need to handle the selection of
2445 2441                                   * non-preferred path in the following
2446 2442                                   * case:
2447 2443                                   *
2448 2444                                   * +------+   +------+   +------+   +-----+
2449 2445                                   * | A : 1| - | B : 1| - | C : 0| - |NULL |
2450 2446                                   * +------+   +------+   +------+   +-----+
2451 2447                                   *
2452 2448                                   * If we start the search with B, we need to
2453 2449                                   * skip beyond B to pick C which is non -
2454 2450                                   * preferred in the second pass. The following
2455 2451                                   * test, if true, will allow us to skip over
2456 2452                                   * the 'start'(B in the example) to select
2457 2453                                   * other non preferred elements.
2458 2454                                   */
2459 2455                                  if ((start_pip != NULL) && (start_pip == pip) &&
2460 2456                                      (MDI_PI(start_pip)->pi_preferred
2461 2457                                      != preferred)) {
2462 2458                                          /*
2463 2459                                           * try again after going past the start
2464 2460                                           * pip
2465 2461                                           */
2466 2462                                          MDI_PI_LOCK(pip);
2467 2463                                          goto do_again;
2468 2464                                  }
2469 2465                          } else {
2470 2466                                  /*
2471 2467                                   * Standard behavior case
2472 2468                                   */
2473 2469                                  if (start == pip && preferred) {
2474 2470                                          /* look for nonpreferred paths */
2475 2471                                          preferred = 0;
2476 2472                                  } else if (start == pip && !preferred) {
2477 2473                                          /*
2478 2474                                           * Exit condition
2479 2475                                           */
2480 2476                                          cont = 0;
2481 2477                                  }
2482 2478                          }
2483 2479                  } while (cont);
2484 2480                  break;
2485 2481          }
2486 2482  
2487 2483          MDI_CLIENT_UNLOCK(ct);
2488 2484          if (retry == 1) {
2489 2485                  return (MDI_BUSY);
2490 2486          } else {
2491 2487                  return (MDI_NOPATH);
2492 2488          }
2493 2489  }
2494 2490  
2495 2491  /*
2496 2492   * For a client, return the next available path to any phci
2497 2493   *
2498 2494   * Note:
2499 2495   *              Caller should hold the branch's devinfo node to get a consistent
2500 2496   *              snap shot of the mdi_pathinfo nodes.
2501 2497   *
2502 2498   *              Please note that even the list is stable the mdi_pathinfo
2503 2499   *              node state and properties are volatile.  The caller should lock
2504 2500   *              and unlock the nodes by calling mdi_pi_lock() and
2505 2501   *              mdi_pi_unlock() functions to get a stable properties.
2506 2502   *
2507 2503   *              If there is a need to use the nodes beyond the hold of the
2508 2504   *              devinfo node period (For ex. I/O), then mdi_pathinfo node
2509 2505   *              need to be held against unexpected removal by calling
2510 2506   *              mdi_hold_path() and should be released by calling
2511 2507   *              mdi_rele_path() on completion.
2512 2508   */
2513 2509  mdi_pathinfo_t *
2514 2510  mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip)
2515 2511  {
2516 2512          mdi_client_t *ct;
2517 2513  
2518 2514          if (!MDI_CLIENT(ct_dip))
2519 2515                  return (NULL);
2520 2516  
2521 2517          /*
2522 2518           * Walk through client link
2523 2519           */
2524 2520          ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client;
2525 2521          ASSERT(ct != NULL);
2526 2522  
2527 2523          if (pip == NULL)
2528 2524                  return ((mdi_pathinfo_t *)ct->ct_path_head);
2529 2525  
2530 2526          return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link);
2531 2527  }
2532 2528  
2533 2529  /*
2534 2530   * For a phci, return the next available path to any client
2535 2531   * Note: ditto mdi_get_next_phci_path()
2536 2532   */
2537 2533  mdi_pathinfo_t *
2538 2534  mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip)
2539 2535  {
2540 2536          mdi_phci_t *ph;
2541 2537  
2542 2538          if (!MDI_PHCI(ph_dip))
2543 2539                  return (NULL);
2544 2540  
2545 2541          /*
2546 2542           * Walk through pHCI link
2547 2543           */
2548 2544          ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci;
2549 2545          ASSERT(ph != NULL);
2550 2546  
2551 2547          if (pip == NULL)
2552 2548                  return ((mdi_pathinfo_t *)ph->ph_path_head);
2553 2549  
2554 2550          return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link);
2555 2551  }
2556 2552  
2557 2553  /*
2558 2554   * mdi_hold_path():
2559 2555   *              Hold the mdi_pathinfo node against unwanted unexpected free.
2560 2556   * Return Values:
2561 2557   *              None
2562 2558   */
2563 2559  void
2564 2560  mdi_hold_path(mdi_pathinfo_t *pip)
2565 2561  {
2566 2562          if (pip) {
2567 2563                  MDI_PI_LOCK(pip);
2568 2564                  MDI_PI_HOLD(pip);
2569 2565                  MDI_PI_UNLOCK(pip);
2570 2566          }
2571 2567  }
2572 2568  
2573 2569  
2574 2570  /*
2575 2571   * mdi_rele_path():
2576 2572   *              Release the mdi_pathinfo node which was selected
2577 2573   *              through mdi_select_path() mechanism or manually held by
2578 2574   *              calling mdi_hold_path().
2579 2575   * Return Values:
2580 2576   *              None
2581 2577   */
2582 2578  void
2583 2579  mdi_rele_path(mdi_pathinfo_t *pip)
2584 2580  {
2585 2581          if (pip) {
2586 2582                  MDI_PI_LOCK(pip);
2587 2583                  MDI_PI_RELE(pip);
2588 2584                  if (MDI_PI(pip)->pi_ref_cnt == 0) {
2589 2585                          cv_broadcast(&MDI_PI(pip)->pi_ref_cv);
2590 2586                  }
2591 2587                  MDI_PI_UNLOCK(pip);
2592 2588          }
2593 2589  }
2594 2590  
2595 2591  /*
2596 2592   * mdi_pi_lock():
2597 2593   *              Lock the mdi_pathinfo node.
2598 2594   * Note:
2599 2595   *              The caller should release the lock by calling mdi_pi_unlock()
2600 2596   */
2601 2597  void
2602 2598  mdi_pi_lock(mdi_pathinfo_t *pip)
2603 2599  {
2604 2600          ASSERT(pip != NULL);
2605 2601          if (pip) {
2606 2602                  MDI_PI_LOCK(pip);
2607 2603          }
2608 2604  }
2609 2605  
2610 2606  
2611 2607  /*
2612 2608   * mdi_pi_unlock():
2613 2609   *              Unlock the mdi_pathinfo node.
2614 2610   * Note:
2615 2611   *              The mdi_pathinfo node should have been locked with mdi_pi_lock()
2616 2612   */
2617 2613  void
2618 2614  mdi_pi_unlock(mdi_pathinfo_t *pip)
2619 2615  {
2620 2616          ASSERT(pip != NULL);
2621 2617          if (pip) {
2622 2618                  MDI_PI_UNLOCK(pip);
2623 2619          }
2624 2620  }
2625 2621  
2626 2622  /*
2627 2623   * mdi_pi_find():
2628 2624   *              Search the list of mdi_pathinfo nodes attached to the
2629 2625   *              pHCI/Client device node whose path address matches "paddr".
2630 2626   *              Returns a pointer to the mdi_pathinfo node if a matching node is
2631 2627   *              found.
2632 2628   * Return Values:
2633 2629   *              mdi_pathinfo node handle
2634 2630   *              NULL
2635 2631   * Notes:
2636 2632   *              Caller need not hold any locks to call this function.
2637 2633   */
2638 2634  mdi_pathinfo_t *
2639 2635  mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr)
2640 2636  {
2641 2637          mdi_phci_t              *ph;
2642 2638          mdi_vhci_t              *vh;
2643 2639          mdi_client_t            *ct;
2644 2640          mdi_pathinfo_t          *pip = NULL;
2645 2641  
2646 2642          MDI_DEBUG(2, (MDI_NOTE, pdip,
2647 2643              "caddr@%s paddr@%s", caddr ? caddr : "", paddr ? paddr : ""));
2648 2644          if ((pdip == NULL) || (paddr == NULL)) {
2649 2645                  return (NULL);
2650 2646          }
2651 2647          ph = i_devi_get_phci(pdip);
2652 2648          if (ph == NULL) {
2653 2649                  /*
2654 2650                   * Invalid pHCI device, Nothing more to do.
2655 2651                   */
2656 2652                  MDI_DEBUG(2, (MDI_WARN, pdip, "invalid phci"));
2657 2653                  return (NULL);
2658 2654          }
2659 2655  
2660 2656          vh = ph->ph_vhci;
2661 2657          if (vh == NULL) {
2662 2658                  /*
2663 2659                   * Invalid vHCI device, Nothing more to do.
2664 2660                   */
2665 2661                  MDI_DEBUG(2, (MDI_WARN, pdip, "invalid vhci"));
2666 2662                  return (NULL);
2667 2663          }
2668 2664  
2669 2665          /*
2670 2666           * Look for pathinfo node identified by paddr.
2671 2667           */
2672 2668          if (caddr == NULL) {
2673 2669                  /*
2674 2670                   * Find a mdi_pathinfo node under pHCI list for a matching
2675 2671                   * unit address.
2676 2672                   */
2677 2673                  MDI_PHCI_LOCK(ph);
2678 2674                  if (MDI_PHCI_IS_OFFLINE(ph)) {
2679 2675                          MDI_DEBUG(2, (MDI_WARN, pdip,
2680 2676                              "offline phci %p", (void *)ph));
2681 2677                          MDI_PHCI_UNLOCK(ph);
2682 2678                          return (NULL);
2683 2679                  }
2684 2680                  pip = (mdi_pathinfo_t *)ph->ph_path_head;
2685 2681  
2686 2682                  while (pip != NULL) {
2687 2683                          if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2688 2684                                  break;
2689 2685                          }
2690 2686                          pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
2691 2687                  }
2692 2688                  MDI_PHCI_UNLOCK(ph);
2693 2689                  MDI_DEBUG(2, (MDI_NOTE, pdip,
2694 2690                      "found %s %p", mdi_pi_spathname(pip), (void *)pip));
2695 2691                  return (pip);
2696 2692          }
2697 2693  
2698 2694          /*
2699 2695           * XXX - Is the rest of the code in this function really necessary?
2700 2696           * The consumers of mdi_pi_find() can search for the desired pathinfo
2701 2697           * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of
2702 2698           * whether the search is based on the pathinfo nodes attached to
2703 2699           * the pHCI or the client node, the result will be the same.
2704 2700           */
2705 2701  
2706 2702          /*
2707 2703           * Find the client device corresponding to 'caddr'
2708 2704           */
2709 2705          MDI_VHCI_CLIENT_LOCK(vh);
2710 2706  
2711 2707          /*
2712 2708           * XXX - Passing NULL to the following function works as long as the
2713 2709           * the client addresses (caddr) are unique per vhci basis.
2714 2710           */
2715 2711          ct = i_mdi_client_find(vh, NULL, caddr);
2716 2712          if (ct == NULL) {
2717 2713                  /*
2718 2714                   * Client not found, Obviously mdi_pathinfo node has not been
2719 2715                   * created yet.
2720 2716                   */
2721 2717                  MDI_VHCI_CLIENT_UNLOCK(vh);
2722 2718                  MDI_DEBUG(2, (MDI_NOTE, pdip,
2723 2719                      "client not found for caddr @%s", caddr ? caddr : ""));
2724 2720                  return (NULL);
2725 2721          }
2726 2722  
2727 2723          /*
2728 2724           * Hold the client lock and look for a mdi_pathinfo node with matching
2729 2725           * pHCI and paddr
2730 2726           */
2731 2727          MDI_CLIENT_LOCK(ct);
2732 2728  
2733 2729          /*
2734 2730           * Release the global mutex as it is no more needed. Note: We always
2735 2731           * respect the locking order while acquiring.
2736 2732           */
2737 2733          MDI_VHCI_CLIENT_UNLOCK(vh);
2738 2734  
2739 2735          pip = (mdi_pathinfo_t *)ct->ct_path_head;
2740 2736          while (pip != NULL) {
2741 2737                  /*
2742 2738                   * Compare the unit address
2743 2739                   */
2744 2740                  if ((MDI_PI(pip)->pi_phci == ph) &&
2745 2741                      strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2746 2742                          break;
2747 2743                  }
2748 2744                  pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2749 2745          }
2750 2746          MDI_CLIENT_UNLOCK(ct);
2751 2747          MDI_DEBUG(2, (MDI_NOTE, pdip,
2752 2748              "found: %s %p", mdi_pi_spathname(pip), (void *)pip));
2753 2749          return (pip);
2754 2750  }
2755 2751  
2756 2752  /*
2757 2753   * mdi_pi_alloc():
2758 2754   *              Allocate and initialize a new instance of a mdi_pathinfo node.
2759 2755   *              The mdi_pathinfo node returned by this function identifies a
2760 2756   *              unique device path is capable of having properties attached
2761 2757   *              and passed to mdi_pi_online() to fully attach and online the
2762 2758   *              path and client device node.
2763 2759   *              The mdi_pathinfo node returned by this function must be
2764 2760   *              destroyed using mdi_pi_free() if the path is no longer
2765 2761   *              operational or if the caller fails to attach a client device
2766 2762   *              node when calling mdi_pi_online(). The framework will not free
2767 2763   *              the resources allocated.
2768 2764   *              This function can be called from both interrupt and kernel
2769 2765   *              contexts.  DDI_NOSLEEP flag should be used while calling
2770 2766   *              from interrupt contexts.
2771 2767   * Return Values:
2772 2768   *              MDI_SUCCESS
2773 2769   *              MDI_FAILURE
2774 2770   *              MDI_NOMEM
2775 2771   */
2776 2772  /*ARGSUSED*/
2777 2773  int
2778 2774  mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2779 2775      char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip)
2780 2776  {
2781 2777          mdi_vhci_t      *vh;
2782 2778          mdi_phci_t      *ph;
2783 2779          mdi_client_t    *ct;
2784 2780          mdi_pathinfo_t  *pip = NULL;
2785 2781          dev_info_t      *cdip;
2786 2782          int             rv = MDI_NOMEM;
2787 2783          int             path_allocated = 0;
2788 2784  
2789 2785          MDI_DEBUG(2, (MDI_NOTE, pdip,
2790 2786              "cname %s: caddr@%s paddr@%s",
2791 2787              cname ? cname : "", caddr ? caddr : "", paddr ? paddr : ""));
2792 2788  
2793 2789          if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL ||
2794 2790              ret_pip == NULL) {
2795 2791                  /* Nothing more to do */
2796 2792                  return (MDI_FAILURE);
2797 2793          }
2798 2794  
2799 2795          *ret_pip = NULL;
2800 2796  
2801 2797          /* No allocations on detaching pHCI */
2802 2798          if (DEVI_IS_DETACHING(pdip)) {
2803 2799                  /* Invalid pHCI device, return failure */
2804 2800                  MDI_DEBUG(1, (MDI_WARN, pdip,
2805 2801                      "!detaching pHCI=%p", (void *)pdip));
2806 2802                  return (MDI_FAILURE);
2807 2803          }
2808 2804  
2809 2805          ph = i_devi_get_phci(pdip);
2810 2806          ASSERT(ph != NULL);
2811 2807          if (ph == NULL) {
2812 2808                  /* Invalid pHCI device, return failure */
2813 2809                  MDI_DEBUG(1, (MDI_WARN, pdip,
2814 2810                      "!invalid pHCI=%p", (void *)pdip));
2815 2811                  return (MDI_FAILURE);
2816 2812          }
2817 2813  
2818 2814          MDI_PHCI_LOCK(ph);
2819 2815          vh = ph->ph_vhci;
2820 2816          if (vh == NULL) {
2821 2817                  /* Invalid vHCI device, return failure */
2822 2818                  MDI_DEBUG(1, (MDI_WARN, pdip,
2823 2819                      "!invalid vHCI=%p", (void *)pdip));
2824 2820                  MDI_PHCI_UNLOCK(ph);
2825 2821                  return (MDI_FAILURE);
2826 2822          }
2827 2823  
2828 2824          if (MDI_PHCI_IS_READY(ph) == 0) {
2829 2825                  /*
2830 2826                   * Do not allow new node creation when pHCI is in
2831 2827                   * offline/suspended states
2832 2828                   */
2833 2829                  MDI_DEBUG(1, (MDI_WARN, pdip,
2834 2830                      "pHCI=%p is not ready", (void *)ph));
2835 2831                  MDI_PHCI_UNLOCK(ph);
2836 2832                  return (MDI_BUSY);
2837 2833          }
2838 2834          MDI_PHCI_UNSTABLE(ph);
2839 2835          MDI_PHCI_UNLOCK(ph);
2840 2836  
2841 2837          /* look for a matching client, create one if not found */
2842 2838          MDI_VHCI_CLIENT_LOCK(vh);
2843 2839          ct = i_mdi_client_find(vh, cname, caddr);
2844 2840          if (ct == NULL) {
2845 2841                  ct = i_mdi_client_alloc(vh, cname, caddr);
2846 2842                  ASSERT(ct != NULL);
2847 2843          }
2848 2844  
2849 2845          if (ct->ct_dip == NULL) {
2850 2846                  /*
2851 2847                   * Allocate a devinfo node
2852 2848                   */
2853 2849                  ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr,
2854 2850                      compatible, ncompatible);
2855 2851                  if (ct->ct_dip == NULL) {
2856 2852                          (void) i_mdi_client_free(vh, ct);
2857 2853                          goto fail;
2858 2854                  }
2859 2855          }
2860 2856          cdip = ct->ct_dip;
2861 2857  
2862 2858          DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT;
2863 2859          DEVI(cdip)->devi_mdi_client = (caddr_t)ct;
2864 2860  
2865 2861          MDI_CLIENT_LOCK(ct);
2866 2862          pip = (mdi_pathinfo_t *)ct->ct_path_head;
2867 2863          while (pip != NULL) {
2868 2864                  /*
2869 2865                   * Compare the unit address
2870 2866                   */
2871 2867                  if ((MDI_PI(pip)->pi_phci == ph) &&
2872 2868                      strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2873 2869                          break;
2874 2870                  }
2875 2871                  pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2876 2872          }
2877 2873          MDI_CLIENT_UNLOCK(ct);
2878 2874  
2879 2875          if (pip == NULL) {
2880 2876                  /*
2881 2877                   * This is a new path for this client device.  Allocate and
2882 2878                   * initialize a new pathinfo node
2883 2879                   */
2884 2880                  pip = i_mdi_pi_alloc(ph, paddr, ct);
2885 2881                  ASSERT(pip != NULL);
2886 2882                  path_allocated = 1;
2887 2883          }
2888 2884          rv = MDI_SUCCESS;
2889 2885  
2890 2886  fail:
2891 2887          /*
2892 2888           * Release the global mutex.
2893 2889           */
2894 2890          MDI_VHCI_CLIENT_UNLOCK(vh);
2895 2891  
2896 2892          /*
2897 2893           * Mark the pHCI as stable
2898 2894           */
2899 2895          MDI_PHCI_LOCK(ph);
2900 2896          MDI_PHCI_STABLE(ph);
2901 2897          MDI_PHCI_UNLOCK(ph);
2902 2898          *ret_pip = pip;
2903 2899  
2904 2900          MDI_DEBUG(2, (MDI_NOTE, pdip,
2905 2901              "alloc %s %p", mdi_pi_spathname(pip), (void *)pip));
2906 2902  
2907 2903          if (path_allocated)
2908 2904                  vhcache_pi_add(vh->vh_config, MDI_PI(pip));
2909 2905  
2910 2906          return (rv);
2911 2907  }
2912 2908  
2913 2909  /*ARGSUSED*/
2914 2910  int
2915 2911  mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2916 2912      int flags, mdi_pathinfo_t **ret_pip)
2917 2913  {
2918 2914          return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0,
2919 2915              flags, ret_pip));
2920 2916  }
2921 2917  
2922 2918  /*
2923 2919   * i_mdi_pi_alloc():
2924 2920   *              Allocate a mdi_pathinfo node and add to the pHCI path list
2925 2921   * Return Values:
2926 2922   *              mdi_pathinfo
2927 2923   */
2928 2924  /*ARGSUSED*/
2929 2925  static mdi_pathinfo_t *
2930 2926  i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct)
2931 2927  {
2932 2928          mdi_pathinfo_t  *pip;
2933 2929          int             ct_circular;
2934 2930          int             ph_circular;
2935 2931          static char     path[MAXPATHLEN];       /* mdi_pathmap_mutex protects */
2936 2932          char            *path_persistent;
2937 2933          int             path_instance;
2938 2934          mod_hash_val_t  hv;
2939 2935  
2940 2936          ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci));
2941 2937  
2942 2938          pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP);
2943 2939          mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL);
2944 2940          MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT |
2945 2941              MDI_PATHINFO_STATE_TRANSIENT;
2946 2942  
2947 2943          if (MDI_PHCI_IS_USER_DISABLED(ph))
2948 2944                  MDI_PI_SET_USER_DISABLE(pip);
2949 2945  
2950 2946          if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph))
2951 2947                  MDI_PI_SET_DRV_DISABLE_TRANS(pip);
2952 2948  
2953 2949          if (MDI_PHCI_IS_DRV_DISABLED(ph))
2954 2950                  MDI_PI_SET_DRV_DISABLE(pip);
2955 2951  
2956 2952          MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT;
2957 2953          cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL);
2958 2954          MDI_PI(pip)->pi_client = ct;
2959 2955          MDI_PI(pip)->pi_phci = ph;
2960 2956          MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP);
2961 2957          (void) strcpy(MDI_PI(pip)->pi_addr, paddr);
2962 2958  
2963 2959          /*
2964 2960           * We form the "path" to the pathinfo node, and see if we have
2965 2961           * already allocated a 'path_instance' for that "path".  If so,
2966 2962           * we use the already allocated 'path_instance'.  If not, we
2967 2963           * allocate a new 'path_instance' and associate it with a copy of
2968 2964           * the "path" string (which is never freed). The association
2969 2965           * between a 'path_instance' this "path" string persists until
2970 2966           * reboot.
2971 2967           */
2972 2968          mutex_enter(&mdi_pathmap_mutex);
2973 2969          (void) ddi_pathname(ph->ph_dip, path);
2974 2970          (void) sprintf(path + strlen(path), "/%s@%s",
2975 2971              mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
2976 2972          if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) {
2977 2973                  path_instance = (uint_t)(intptr_t)hv;
2978 2974          } else {
2979 2975                  /* allocate a new 'path_instance' and persistent "path" */
2980 2976                  path_instance = mdi_pathmap_instance++;
2981 2977                  path_persistent = i_ddi_strdup(path, KM_SLEEP);
2982 2978                  (void) mod_hash_insert(mdi_pathmap_bypath,
2983 2979                      (mod_hash_key_t)path_persistent,
2984 2980                      (mod_hash_val_t)(intptr_t)path_instance);
2985 2981                  (void) mod_hash_insert(mdi_pathmap_byinstance,
2986 2982                      (mod_hash_key_t)(intptr_t)path_instance,
2987 2983                      (mod_hash_val_t)path_persistent);
2988 2984  
2989 2985                  /* create shortpath name */
2990 2986                  (void) snprintf(path, sizeof(path), "%s%d/%s@%s",
2991 2987                      ddi_driver_name(ph->ph_dip), ddi_get_instance(ph->ph_dip),
2992 2988                      mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
2993 2989                  path_persistent = i_ddi_strdup(path, KM_SLEEP);
2994 2990                  (void) mod_hash_insert(mdi_pathmap_sbyinstance,
2995 2991                      (mod_hash_key_t)(intptr_t)path_instance,
2996 2992                      (mod_hash_val_t)path_persistent);
2997 2993          }
2998 2994          mutex_exit(&mdi_pathmap_mutex);
2999 2995          MDI_PI(pip)->pi_path_instance = path_instance;
3000 2996  
3001 2997          (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP);
3002 2998          ASSERT(MDI_PI(pip)->pi_prop != NULL);
3003 2999          MDI_PI(pip)->pi_pprivate = NULL;
3004 3000          MDI_PI(pip)->pi_cprivate = NULL;
3005 3001          MDI_PI(pip)->pi_vprivate = NULL;
3006 3002          MDI_PI(pip)->pi_client_link = NULL;
3007 3003          MDI_PI(pip)->pi_phci_link = NULL;
3008 3004          MDI_PI(pip)->pi_ref_cnt = 0;
3009 3005          MDI_PI(pip)->pi_kstats = NULL;
3010 3006          MDI_PI(pip)->pi_preferred = 1;
3011 3007          cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL);
3012 3008  
3013 3009          /*
3014 3010           * Lock both dev_info nodes against changes in parallel.
3015 3011           *
3016 3012           * The ndi_devi_enter(Client), is atypical since the client is a leaf.
3017 3013           * This atypical operation is done to synchronize pathinfo nodes
3018 3014           * during devinfo snapshot (see di_register_pip) by 'pretending' that
3019 3015           * the pathinfo nodes are children of the Client.
3020 3016           */
3021 3017          ndi_devi_enter(ct->ct_dip, &ct_circular);
3022 3018          ndi_devi_enter(ph->ph_dip, &ph_circular);
3023 3019  
3024 3020          i_mdi_phci_add_path(ph, pip);
3025 3021          i_mdi_client_add_path(ct, pip);
3026 3022  
3027 3023          ndi_devi_exit(ph->ph_dip, ph_circular);
3028 3024          ndi_devi_exit(ct->ct_dip, ct_circular);
3029 3025  
3030 3026          return (pip);
3031 3027  }
3032 3028  
3033 3029  /*
3034 3030   * mdi_pi_pathname_by_instance():
3035 3031   *      Lookup of "path" by 'path_instance'. Return "path".
3036 3032   *      NOTE: returned "path" remains valid forever (until reboot).
3037 3033   */
3038 3034  char *
3039 3035  mdi_pi_pathname_by_instance(int path_instance)
3040 3036  {
3041 3037          char            *path;
3042 3038          mod_hash_val_t  hv;
3043 3039  
3044 3040          /* mdi_pathmap lookup of "path" by 'path_instance' */
3045 3041          mutex_enter(&mdi_pathmap_mutex);
3046 3042          if (mod_hash_find(mdi_pathmap_byinstance,
3047 3043              (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
3048 3044                  path = (char *)hv;
3049 3045          else
3050 3046                  path = NULL;
3051 3047          mutex_exit(&mdi_pathmap_mutex);
3052 3048          return (path);
3053 3049  }
3054 3050  
3055 3051  /*
3056 3052   * mdi_pi_spathname_by_instance():
3057 3053   *      Lookup of "shortpath" by 'path_instance'. Return "shortpath".
3058 3054   *      NOTE: returned "shortpath" remains valid forever (until reboot).
3059 3055   */
3060 3056  char *
3061 3057  mdi_pi_spathname_by_instance(int path_instance)
3062 3058  {
3063 3059          char            *path;
3064 3060          mod_hash_val_t  hv;
3065 3061  
3066 3062          /* mdi_pathmap lookup of "path" by 'path_instance' */
3067 3063          mutex_enter(&mdi_pathmap_mutex);
3068 3064          if (mod_hash_find(mdi_pathmap_sbyinstance,
3069 3065              (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
3070 3066                  path = (char *)hv;
3071 3067          else
3072 3068                  path = NULL;
3073 3069          mutex_exit(&mdi_pathmap_mutex);
3074 3070          return (path);
3075 3071  }
3076 3072  
3077 3073  
3078 3074  /*
3079 3075   * i_mdi_phci_add_path():
3080 3076   *              Add a mdi_pathinfo node to pHCI list.
3081 3077   * Notes:
3082 3078   *              Caller should per-pHCI mutex
3083 3079   */
3084 3080  static void
3085 3081  i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3086 3082  {
3087 3083          ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3088 3084  
3089 3085          MDI_PHCI_LOCK(ph);
3090 3086          if (ph->ph_path_head == NULL) {
3091 3087                  ph->ph_path_head = pip;
3092 3088          } else {
3093 3089                  MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip);
3094 3090          }
3095 3091          ph->ph_path_tail = pip;
3096 3092          ph->ph_path_count++;
3097 3093          MDI_PHCI_UNLOCK(ph);
3098 3094  }
3099 3095  
3100 3096  /*
3101 3097   * i_mdi_client_add_path():
3102 3098   *              Add mdi_pathinfo node to client list
3103 3099   */
3104 3100  static void
3105 3101  i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3106 3102  {
3107 3103          ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3108 3104  
3109 3105          MDI_CLIENT_LOCK(ct);
3110 3106          if (ct->ct_path_head == NULL) {
3111 3107                  ct->ct_path_head = pip;
3112 3108          } else {
3113 3109                  MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip);
3114 3110          }
3115 3111          ct->ct_path_tail = pip;
3116 3112          ct->ct_path_count++;
3117 3113          MDI_CLIENT_UNLOCK(ct);
3118 3114  }
3119 3115  
3120 3116  /*
3121 3117   * mdi_pi_free():
3122 3118   *              Free the mdi_pathinfo node and also client device node if this
3123 3119   *              is the last path to the device
3124 3120   * Return Values:
3125 3121   *              MDI_SUCCESS
3126 3122   *              MDI_FAILURE
3127 3123   *              MDI_BUSY
3128 3124   */
3129 3125  /*ARGSUSED*/
3130 3126  int
3131 3127  mdi_pi_free(mdi_pathinfo_t *pip, int flags)
3132 3128  {
3133 3129          int             rv;
3134 3130          mdi_vhci_t      *vh;
3135 3131          mdi_phci_t      *ph;
3136 3132          mdi_client_t    *ct;
3137 3133          int             (*f)();
3138 3134          int             client_held = 0;
3139 3135  
3140 3136          MDI_PI_LOCK(pip);
3141 3137          ph = MDI_PI(pip)->pi_phci;
3142 3138          ASSERT(ph != NULL);
3143 3139          if (ph == NULL) {
3144 3140                  /*
3145 3141                   * Invalid pHCI device, return failure
3146 3142                   */
3147 3143                  MDI_DEBUG(1, (MDI_WARN, NULL,
3148 3144                      "!invalid pHCI: pip %s %p",
3149 3145                      mdi_pi_spathname(pip), (void *)pip));
3150 3146                  MDI_PI_UNLOCK(pip);
3151 3147                  return (MDI_FAILURE);
3152 3148          }
3153 3149  
3154 3150          vh = ph->ph_vhci;
3155 3151          ASSERT(vh != NULL);
3156 3152          if (vh == NULL) {
3157 3153                  /* Invalid pHCI device, return failure */
3158 3154                  MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3159 3155                      "!invalid vHCI: pip %s %p",
3160 3156                      mdi_pi_spathname(pip), (void *)pip));
3161 3157                  MDI_PI_UNLOCK(pip);
3162 3158                  return (MDI_FAILURE);
3163 3159          }
3164 3160  
3165 3161          ct = MDI_PI(pip)->pi_client;
3166 3162          ASSERT(ct != NULL);
3167 3163          if (ct == NULL) {
3168 3164                  /*
3169 3165                   * Invalid Client device, return failure
3170 3166                   */
3171 3167                  MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3172 3168                      "!invalid client: pip %s %p",
3173 3169                      mdi_pi_spathname(pip), (void *)pip));
3174 3170                  MDI_PI_UNLOCK(pip);
3175 3171                  return (MDI_FAILURE);
3176 3172          }
3177 3173  
3178 3174          /*
3179 3175           * Check to see for busy condition.  A mdi_pathinfo can only be freed
3180 3176           * if the node state is either offline or init and the reference count
3181 3177           * is zero.
3182 3178           */
3183 3179          if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) ||
3184 3180              MDI_PI_IS_INITING(pip))) {
3185 3181                  /*
3186 3182                   * Node is busy
3187 3183                   */
3188 3184                  MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3189 3185                      "!busy: pip %s %p", mdi_pi_spathname(pip), (void *)pip));
3190 3186                  MDI_PI_UNLOCK(pip);
3191 3187                  return (MDI_BUSY);
3192 3188          }
3193 3189  
3194 3190          while (MDI_PI(pip)->pi_ref_cnt != 0) {
3195 3191                  /*
3196 3192                   * Give a chance for pending I/Os to complete.
3197 3193                   */
3198 3194                  MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3199 3195                      "!%d cmds still pending on path: %s %p",
3200 3196                      MDI_PI(pip)->pi_ref_cnt,
3201 3197                      mdi_pi_spathname(pip), (void *)pip));
3202 3198                  if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
3203 3199                      &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
3204 3200                      TR_CLOCK_TICK) == -1) {
3205 3201                          /*
3206 3202                           * The timeout time reached without ref_cnt being zero
3207 3203                           * being signaled.
3208 3204                           */
3209 3205                          MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3210 3206                              "!Timeout reached on path %s %p without the cond",
3211 3207                              mdi_pi_spathname(pip), (void *)pip));
3212 3208                          MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3213 3209                              "!%d cmds still pending on path %s %p",
3214 3210                              MDI_PI(pip)->pi_ref_cnt,
3215 3211                              mdi_pi_spathname(pip), (void *)pip));
3216 3212                          MDI_PI_UNLOCK(pip);
3217 3213                          return (MDI_BUSY);
3218 3214                  }
3219 3215          }
3220 3216          if (MDI_PI(pip)->pi_pm_held) {
3221 3217                  client_held = 1;
3222 3218          }
3223 3219          MDI_PI_UNLOCK(pip);
3224 3220  
3225 3221          vhcache_pi_remove(vh->vh_config, MDI_PI(pip));
3226 3222  
3227 3223          MDI_CLIENT_LOCK(ct);
3228 3224  
3229 3225          /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */
3230 3226          MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct);
3231 3227  
3232 3228          /*
3233 3229           * Wait till failover is complete before removing this node.
3234 3230           */
3235 3231          while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3236 3232                  cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3237 3233  
3238 3234          MDI_CLIENT_UNLOCK(ct);
3239 3235          MDI_VHCI_CLIENT_LOCK(vh);
3240 3236          MDI_CLIENT_LOCK(ct);
3241 3237          MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct);
3242 3238  
3243 3239          if (!MDI_PI_IS_INITING(pip)) {
3244 3240                  f = vh->vh_ops->vo_pi_uninit;
3245 3241                  if (f != NULL) {
3246 3242                          rv = (*f)(vh->vh_dip, pip, 0);
3247 3243                  }
3248 3244          } else
3249 3245                  rv = MDI_SUCCESS;
3250 3246  
3251 3247          /*
3252 3248           * If vo_pi_uninit() completed successfully.
3253 3249           */
3254 3250          if (rv == MDI_SUCCESS) {
3255 3251                  if (client_held) {
  
    | 
      ↓ open down ↓ | 
    2046 lines elided | 
    
      ↑ open up ↑ | 
  
3256 3252                          MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3257 3253                              "i_mdi_pm_rele_client\n"));
3258 3254                          i_mdi_pm_rele_client(ct, 1);
3259 3255                  }
3260 3256                  i_mdi_pi_free(ph, pip, ct);
3261 3257                  if (ct->ct_path_count == 0) {
3262 3258                          /*
3263 3259                           * Client lost its last path.
3264 3260                           * Clean up the client device
3265 3261                           */
     3262 +                        ct->ct_flags |= flags;
3266 3263                          MDI_CLIENT_UNLOCK(ct);
3267 3264                          (void) i_mdi_client_free(ct->ct_vhci, ct);
3268 3265                          MDI_VHCI_CLIENT_UNLOCK(vh);
3269 3266                          return (rv);
3270 3267                  }
3271 3268          }
3272 3269          MDI_CLIENT_UNLOCK(ct);
3273 3270          MDI_VHCI_CLIENT_UNLOCK(vh);
3274 3271  
3275 3272          if (rv == MDI_FAILURE)
3276 3273                  vhcache_pi_add(vh->vh_config, MDI_PI(pip));
3277 3274  
3278 3275          return (rv);
3279 3276  }
3280 3277  
3281 3278  /*
3282 3279   * i_mdi_pi_free():
3283 3280   *              Free the mdi_pathinfo node
3284 3281   */
3285 3282  static void
3286 3283  i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct)
3287 3284  {
3288 3285          int     ct_circular;
3289 3286          int     ph_circular;
3290 3287  
3291 3288          ASSERT(MDI_CLIENT_LOCKED(ct));
3292 3289  
3293 3290          /*
3294 3291           * remove any per-path kstats
3295 3292           */
3296 3293          i_mdi_pi_kstat_destroy(pip);
3297 3294  
3298 3295          /* See comments in i_mdi_pi_alloc() */
3299 3296          ndi_devi_enter(ct->ct_dip, &ct_circular);
3300 3297          ndi_devi_enter(ph->ph_dip, &ph_circular);
3301 3298  
3302 3299          i_mdi_client_remove_path(ct, pip);
3303 3300          i_mdi_phci_remove_path(ph, pip);
3304 3301  
3305 3302          ndi_devi_exit(ph->ph_dip, ph_circular);
3306 3303          ndi_devi_exit(ct->ct_dip, ct_circular);
3307 3304  
3308 3305          mutex_destroy(&MDI_PI(pip)->pi_mutex);
3309 3306          cv_destroy(&MDI_PI(pip)->pi_state_cv);
3310 3307          cv_destroy(&MDI_PI(pip)->pi_ref_cv);
3311 3308          if (MDI_PI(pip)->pi_addr) {
3312 3309                  kmem_free(MDI_PI(pip)->pi_addr,
3313 3310                      strlen(MDI_PI(pip)->pi_addr) + 1);
3314 3311                  MDI_PI(pip)->pi_addr = NULL;
3315 3312          }
3316 3313  
3317 3314          if (MDI_PI(pip)->pi_prop) {
3318 3315                  (void) nvlist_free(MDI_PI(pip)->pi_prop);
3319 3316                  MDI_PI(pip)->pi_prop = NULL;
3320 3317          }
3321 3318          kmem_free(pip, sizeof (struct mdi_pathinfo));
3322 3319  }
3323 3320  
3324 3321  
3325 3322  /*
3326 3323   * i_mdi_phci_remove_path():
3327 3324   *              Remove a mdi_pathinfo node from pHCI list.
3328 3325   * Notes:
3329 3326   *              Caller should hold per-pHCI mutex
3330 3327   */
3331 3328  static void
3332 3329  i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3333 3330  {
3334 3331          mdi_pathinfo_t  *prev = NULL;
3335 3332          mdi_pathinfo_t  *path = NULL;
3336 3333  
3337 3334          ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3338 3335  
3339 3336          MDI_PHCI_LOCK(ph);
3340 3337          path = ph->ph_path_head;
3341 3338          while (path != NULL) {
3342 3339                  if (path == pip) {
3343 3340                          break;
3344 3341                  }
3345 3342                  prev = path;
3346 3343                  path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3347 3344          }
3348 3345  
3349 3346          if (path) {
3350 3347                  ph->ph_path_count--;
3351 3348                  if (prev) {
3352 3349                          MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link;
3353 3350                  } else {
3354 3351                          ph->ph_path_head =
3355 3352                              (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3356 3353                  }
3357 3354                  if (ph->ph_path_tail == path) {
3358 3355                          ph->ph_path_tail = prev;
3359 3356                  }
3360 3357          }
3361 3358  
3362 3359          /*
3363 3360           * Clear the pHCI link
3364 3361           */
3365 3362          MDI_PI(pip)->pi_phci_link = NULL;
3366 3363          MDI_PI(pip)->pi_phci = NULL;
3367 3364          MDI_PHCI_UNLOCK(ph);
3368 3365  }
3369 3366  
3370 3367  /*
3371 3368   * i_mdi_client_remove_path():
3372 3369   *              Remove a mdi_pathinfo node from client path list.
3373 3370   */
3374 3371  static void
3375 3372  i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3376 3373  {
3377 3374          mdi_pathinfo_t  *prev = NULL;
3378 3375          mdi_pathinfo_t  *path;
3379 3376  
3380 3377          ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3381 3378  
3382 3379          ASSERT(MDI_CLIENT_LOCKED(ct));
3383 3380          path = ct->ct_path_head;
3384 3381          while (path != NULL) {
3385 3382                  if (path == pip) {
3386 3383                          break;
3387 3384                  }
3388 3385                  prev = path;
3389 3386                  path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3390 3387          }
3391 3388  
3392 3389          if (path) {
3393 3390                  ct->ct_path_count--;
3394 3391                  if (prev) {
3395 3392                          MDI_PI(prev)->pi_client_link =
3396 3393                              MDI_PI(path)->pi_client_link;
3397 3394                  } else {
3398 3395                          ct->ct_path_head =
3399 3396                              (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3400 3397                  }
3401 3398                  if (ct->ct_path_tail == path) {
3402 3399                          ct->ct_path_tail = prev;
3403 3400                  }
3404 3401                  if (ct->ct_path_last == path) {
3405 3402                          ct->ct_path_last = ct->ct_path_head;
3406 3403                  }
3407 3404          }
3408 3405          MDI_PI(pip)->pi_client_link = NULL;
3409 3406          MDI_PI(pip)->pi_client = NULL;
3410 3407  }
3411 3408  
3412 3409  /*
3413 3410   * i_mdi_pi_state_change():
3414 3411   *              online a mdi_pathinfo node
3415 3412   *
3416 3413   * Return Values:
3417 3414   *              MDI_SUCCESS
3418 3415   *              MDI_FAILURE
3419 3416   */
3420 3417  /*ARGSUSED*/
3421 3418  static int
3422 3419  i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag)
3423 3420  {
3424 3421          int             rv = MDI_SUCCESS;
3425 3422          mdi_vhci_t      *vh;
3426 3423          mdi_phci_t      *ph;
3427 3424          mdi_client_t    *ct;
3428 3425          int             (*f)();
3429 3426          dev_info_t      *cdip;
3430 3427  
3431 3428          MDI_PI_LOCK(pip);
3432 3429  
3433 3430          ph = MDI_PI(pip)->pi_phci;
3434 3431          ASSERT(ph);
3435 3432          if (ph == NULL) {
3436 3433                  /*
3437 3434                   * Invalid pHCI device, fail the request
3438 3435                   */
3439 3436                  MDI_PI_UNLOCK(pip);
3440 3437                  MDI_DEBUG(1, (MDI_WARN, NULL,
3441 3438                      "!invalid phci: pip %s %p",
3442 3439                      mdi_pi_spathname(pip), (void *)pip));
3443 3440                  return (MDI_FAILURE);
3444 3441          }
3445 3442  
3446 3443          vh = ph->ph_vhci;
3447 3444          ASSERT(vh);
3448 3445          if (vh == NULL) {
3449 3446                  /*
3450 3447                   * Invalid vHCI device, fail the request
3451 3448                   */
3452 3449                  MDI_PI_UNLOCK(pip);
3453 3450                  MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3454 3451                      "!invalid vhci: pip %s %p",
3455 3452                      mdi_pi_spathname(pip), (void *)pip));
3456 3453                  return (MDI_FAILURE);
3457 3454          }
3458 3455  
3459 3456          ct = MDI_PI(pip)->pi_client;
3460 3457          ASSERT(ct != NULL);
3461 3458          if (ct == NULL) {
3462 3459                  /*
3463 3460                   * Invalid client device, fail the request
3464 3461                   */
3465 3462                  MDI_PI_UNLOCK(pip);
3466 3463                  MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3467 3464                      "!invalid client: pip %s %p",
3468 3465                      mdi_pi_spathname(pip), (void *)pip));
3469 3466                  return (MDI_FAILURE);
3470 3467          }
3471 3468  
3472 3469          /*
3473 3470           * If this path has not been initialized yet, Callback vHCI driver's
3474 3471           * pathinfo node initialize entry point
3475 3472           */
3476 3473  
3477 3474          if (MDI_PI_IS_INITING(pip)) {
3478 3475                  MDI_PI_UNLOCK(pip);
3479 3476                  f = vh->vh_ops->vo_pi_init;
3480 3477                  if (f != NULL) {
3481 3478                          rv = (*f)(vh->vh_dip, pip, 0);
3482 3479                          if (rv != MDI_SUCCESS) {
3483 3480                                  MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3484 3481                                      "!vo_pi_init failed: vHCI %p, pip %s %p",
3485 3482                                      (void *)vh, mdi_pi_spathname(pip),
3486 3483                                      (void *)pip));
3487 3484                                  return (MDI_FAILURE);
3488 3485                          }
3489 3486                  }
3490 3487                  MDI_PI_LOCK(pip);
3491 3488                  MDI_PI_CLEAR_TRANSIENT(pip);
3492 3489          }
3493 3490  
3494 3491          /*
3495 3492           * Do not allow state transition when pHCI is in offline/suspended
3496 3493           * states
3497 3494           */
3498 3495          i_mdi_phci_lock(ph, pip);
3499 3496          if (MDI_PHCI_IS_READY(ph) == 0) {
3500 3497                  MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3501 3498                      "!pHCI not ready, pHCI=%p", (void *)ph));
3502 3499                  MDI_PI_UNLOCK(pip);
3503 3500                  i_mdi_phci_unlock(ph);
3504 3501                  return (MDI_BUSY);
3505 3502          }
3506 3503          MDI_PHCI_UNSTABLE(ph);
3507 3504          i_mdi_phci_unlock(ph);
3508 3505  
3509 3506          /*
3510 3507           * Check if mdi_pathinfo state is in transient state.
3511 3508           * If yes, offlining is in progress and wait till transient state is
3512 3509           * cleared.
3513 3510           */
3514 3511          if (MDI_PI_IS_TRANSIENT(pip)) {
3515 3512                  while (MDI_PI_IS_TRANSIENT(pip)) {
3516 3513                          cv_wait(&MDI_PI(pip)->pi_state_cv,
3517 3514                              &MDI_PI(pip)->pi_mutex);
3518 3515                  }
3519 3516          }
3520 3517  
3521 3518          /*
3522 3519           * Grab the client lock in reverse order sequence and release the
3523 3520           * mdi_pathinfo mutex.
3524 3521           */
3525 3522          i_mdi_client_lock(ct, pip);
3526 3523          MDI_PI_UNLOCK(pip);
3527 3524  
3528 3525          /*
3529 3526           * Wait till failover state is cleared
3530 3527           */
3531 3528          while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3532 3529                  cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3533 3530  
3534 3531          /*
3535 3532           * Mark the mdi_pathinfo node state as transient
3536 3533           */
3537 3534          MDI_PI_LOCK(pip);
3538 3535          switch (state) {
3539 3536          case MDI_PATHINFO_STATE_ONLINE:
3540 3537                  MDI_PI_SET_ONLINING(pip);
3541 3538                  break;
3542 3539  
3543 3540          case MDI_PATHINFO_STATE_STANDBY:
3544 3541                  MDI_PI_SET_STANDBYING(pip);
3545 3542                  break;
3546 3543  
3547 3544          case MDI_PATHINFO_STATE_FAULT:
3548 3545                  /*
3549 3546                   * Mark the pathinfo state as FAULTED
3550 3547                   */
3551 3548                  MDI_PI_SET_FAULTING(pip);
3552 3549                  MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR);
3553 3550                  break;
3554 3551  
3555 3552          case MDI_PATHINFO_STATE_OFFLINE:
3556 3553                  /*
3557 3554                   * ndi_devi_offline() cannot hold pip or ct locks.
3558 3555                   */
3559 3556                  MDI_PI_UNLOCK(pip);
3560 3557  
3561 3558                  /*
3562 3559                   * If this is a user initiated path online->offline operation
3563 3560                   * who's success would transition a client from DEGRADED to
3564 3561                   * FAILED then only proceed if we can offline the client first.
3565 3562                   */
3566 3563                  cdip = ct->ct_dip;
3567 3564                  if ((flag & NDI_USER_REQ) &&
3568 3565                      MDI_PI_IS_ONLINE(pip) &&
3569 3566                      (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) {
3570 3567                          i_mdi_client_unlock(ct);
3571 3568                          rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN);
3572 3569                          if (rv != NDI_SUCCESS) {
3573 3570                                  /*
3574 3571                                   * Convert to MDI error code
3575 3572                                   */
3576 3573                                  switch (rv) {
3577 3574                                  case NDI_BUSY:
3578 3575                                          rv = MDI_BUSY;
3579 3576                                          break;
3580 3577                                  default:
3581 3578                                          rv = MDI_FAILURE;
3582 3579                                          break;
3583 3580                                  }
3584 3581                                  goto state_change_exit;
3585 3582                          } else {
3586 3583                                  i_mdi_client_lock(ct, NULL);
3587 3584                          }
3588 3585                  }
3589 3586                  /*
3590 3587                   * Mark the mdi_pathinfo node state as transient
3591 3588                   */
3592 3589                  MDI_PI_LOCK(pip);
3593 3590                  MDI_PI_SET_OFFLINING(pip);
3594 3591                  break;
3595 3592          }
3596 3593          MDI_PI_UNLOCK(pip);
3597 3594          MDI_CLIENT_UNSTABLE(ct);
3598 3595          i_mdi_client_unlock(ct);
3599 3596  
3600 3597          f = vh->vh_ops->vo_pi_state_change;
3601 3598          if (f != NULL)
3602 3599                  rv = (*f)(vh->vh_dip, pip, state, 0, flag);
3603 3600  
3604 3601          MDI_CLIENT_LOCK(ct);
3605 3602          MDI_PI_LOCK(pip);
3606 3603          if (rv == MDI_NOT_SUPPORTED) {
3607 3604                  MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct);
3608 3605          }
3609 3606          if (rv != MDI_SUCCESS) {
3610 3607                  MDI_DEBUG(2, (MDI_WARN, ct->ct_dip,
3611 3608                      "vo_pi_state_change failed: rv %x", rv));
3612 3609          }
3613 3610          if (MDI_PI_IS_TRANSIENT(pip)) {
3614 3611                  if (rv == MDI_SUCCESS) {
3615 3612                          MDI_PI_CLEAR_TRANSIENT(pip);
3616 3613                  } else {
3617 3614                          MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
3618 3615                  }
3619 3616          }
3620 3617  
3621 3618          /*
3622 3619           * Wake anyone waiting for this mdi_pathinfo node
3623 3620           */
3624 3621          cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3625 3622          MDI_PI_UNLOCK(pip);
3626 3623  
3627 3624          /*
3628 3625           * Mark the client device as stable
3629 3626           */
3630 3627          MDI_CLIENT_STABLE(ct);
3631 3628          if (rv == MDI_SUCCESS) {
3632 3629                  if (ct->ct_unstable == 0) {
3633 3630                          cdip = ct->ct_dip;
3634 3631  
3635 3632                          /*
3636 3633                           * Onlining the mdi_pathinfo node will impact the
3637 3634                           * client state Update the client and dev_info node
3638 3635                           * state accordingly
3639 3636                           */
3640 3637                          rv = NDI_SUCCESS;
3641 3638                          i_mdi_client_update_state(ct);
3642 3639                          switch (MDI_CLIENT_STATE(ct)) {
3643 3640                          case MDI_CLIENT_STATE_OPTIMAL:
3644 3641                          case MDI_CLIENT_STATE_DEGRADED:
3645 3642                                  if (cdip && !i_ddi_devi_attached(cdip) &&
3646 3643                                      ((state == MDI_PATHINFO_STATE_ONLINE) ||
3647 3644                                      (state == MDI_PATHINFO_STATE_STANDBY))) {
3648 3645  
3649 3646                                          /*
3650 3647                                           * Must do ndi_devi_online() through
3651 3648                                           * hotplug thread for deferred
3652 3649                                           * attach mechanism to work
3653 3650                                           */
3654 3651                                          MDI_CLIENT_UNLOCK(ct);
3655 3652                                          rv = ndi_devi_online(cdip, 0);
3656 3653                                          MDI_CLIENT_LOCK(ct);
3657 3654                                          if ((rv != NDI_SUCCESS) &&
3658 3655                                              (MDI_CLIENT_STATE(ct) ==
3659 3656                                              MDI_CLIENT_STATE_DEGRADED)) {
3660 3657                                                  MDI_DEBUG(1, (MDI_WARN, cdip,
3661 3658                                                      "!ndi_devi_online failed "
3662 3659                                                      "error %x", rv));
  
    | 
      ↓ open down ↓ | 
    387 lines elided | 
    
      ↑ open up ↑ | 
  
3663 3660                                          }
3664 3661                                          rv = NDI_SUCCESS;
3665 3662                                  }
3666 3663                                  break;
3667 3664  
3668 3665                          case MDI_CLIENT_STATE_FAILED:
3669 3666                                  /*
3670 3667                                   * This is the last path case for
3671 3668                                   * non-user initiated events.
3672 3669                                   */
3673      -                                if (((flag & NDI_USER_REQ) == 0) &&
3674      -                                    cdip && (i_ddi_node_state(cdip) >=
3675      -                                    DS_INITIALIZED)) {
3676      -                                        MDI_CLIENT_UNLOCK(ct);
3677      -                                        rv = ndi_devi_offline(cdip,
3678      -                                            NDI_DEVFS_CLEAN);
3679      -                                        MDI_CLIENT_LOCK(ct);
     3670 +                                if ((flag & NDI_USER_REQ) ||
     3671 +                                    cdip == NULL || i_ddi_node_state(cdip) <
     3672 +                                    DS_INITIALIZED)
     3673 +                                        break;
3680 3674  
3681      -                                        if (rv != NDI_SUCCESS) {
3682      -                                                /*
3683      -                                                 * ndi_devi_offline failed.
3684      -                                                 * Reset client flags to
3685      -                                                 * online as the path could not
3686      -                                                 * be offlined.
3687      -                                                 */
3688      -                                                MDI_DEBUG(1, (MDI_WARN, cdip,
3689      -                                                    "!ndi_devi_offline failed: "
3690      -                                                    "error %x", rv));
3691      -                                                MDI_CLIENT_SET_ONLINE(ct);
3692      -                                        }
     3675 +                                MDI_CLIENT_UNLOCK(ct);
     3676 +                                rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN |
     3677 +                                    NDI_DEVI_GONE);
     3678 +                                MDI_CLIENT_LOCK(ct);
     3679 +
     3680 +                                if (rv != NDI_SUCCESS) {
     3681 +                                        /*
     3682 +                                         * Reset client flags to online as the
     3683 +                                         * path could not be offlined.
     3684 +                                         */
     3685 +                                        MDI_DEBUG(1, (MDI_WARN, cdip,
     3686 +                                            "!ndi_devi_offline failed: %d",
     3687 +                                            rv));
     3688 +                                        MDI_CLIENT_SET_ONLINE(ct);
3693 3689                                  }
3694 3690                                  break;
3695 3691                          }
3696 3692                          /*
3697 3693                           * Convert to MDI error code
3698 3694                           */
3699 3695                          switch (rv) {
3700 3696                          case NDI_SUCCESS:
3701 3697                                  MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3702 3698                                  i_mdi_report_path_state(ct, pip);
3703 3699                                  rv = MDI_SUCCESS;
3704 3700                                  break;
3705 3701                          case NDI_BUSY:
3706 3702                                  rv = MDI_BUSY;
3707 3703                                  break;
3708 3704                          default:
3709 3705                                  rv = MDI_FAILURE;
3710 3706                                  break;
3711 3707                          }
3712 3708                  }
3713 3709          }
3714 3710          MDI_CLIENT_UNLOCK(ct);
3715 3711  
3716 3712  state_change_exit:
3717 3713          /*
3718 3714           * Mark the pHCI as stable again.
3719 3715           */
3720 3716          MDI_PHCI_LOCK(ph);
3721 3717          MDI_PHCI_STABLE(ph);
3722 3718          MDI_PHCI_UNLOCK(ph);
3723 3719          return (rv);
3724 3720  }
3725 3721  
3726 3722  /*
3727 3723   * mdi_pi_online():
3728 3724   *              Place the path_info node in the online state.  The path is
3729 3725   *              now available to be selected by mdi_select_path() for
3730 3726   *              transporting I/O requests to client devices.
3731 3727   * Return Values:
3732 3728   *              MDI_SUCCESS
3733 3729   *              MDI_FAILURE
3734 3730   */
3735 3731  int
3736 3732  mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3737 3733  {
3738 3734          mdi_client_t    *ct = MDI_PI(pip)->pi_client;
3739 3735          int             client_held = 0;
3740 3736          int             rv;
3741 3737  
3742 3738          ASSERT(ct != NULL);
3743 3739          rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags);
3744 3740          if (rv != MDI_SUCCESS)
3745 3741                  return (rv);
3746 3742  
3747 3743          MDI_PI_LOCK(pip);
3748 3744          if (MDI_PI(pip)->pi_pm_held == 0) {
3749 3745                  MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3750 3746                      "i_mdi_pm_hold_pip %p", (void *)pip));
3751 3747                  i_mdi_pm_hold_pip(pip);
3752 3748                  client_held = 1;
3753 3749          }
3754 3750          MDI_PI_UNLOCK(pip);
3755 3751  
3756 3752          if (client_held) {
3757 3753                  MDI_CLIENT_LOCK(ct);
3758 3754                  if (ct->ct_power_cnt == 0) {
3759 3755                          rv = i_mdi_power_all_phci(ct);
3760 3756                  }
3761 3757  
3762 3758                  MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3763 3759                      "i_mdi_pm_hold_client %p", (void *)ct));
3764 3760                  i_mdi_pm_hold_client(ct, 1);
3765 3761                  MDI_CLIENT_UNLOCK(ct);
3766 3762          }
3767 3763  
3768 3764          return (rv);
3769 3765  }
3770 3766  
3771 3767  /*
3772 3768   * mdi_pi_standby():
3773 3769   *              Place the mdi_pathinfo node in standby state
3774 3770   *
3775 3771   * Return Values:
3776 3772   *              MDI_SUCCESS
3777 3773   *              MDI_FAILURE
3778 3774   */
3779 3775  int
3780 3776  mdi_pi_standby(mdi_pathinfo_t *pip, int flags)
3781 3777  {
3782 3778          return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags));
3783 3779  }
3784 3780  
3785 3781  /*
3786 3782   * mdi_pi_fault():
3787 3783   *              Place the mdi_pathinfo node in fault'ed state
3788 3784   * Return Values:
3789 3785   *              MDI_SUCCESS
3790 3786   *              MDI_FAILURE
3791 3787   */
3792 3788  int
3793 3789  mdi_pi_fault(mdi_pathinfo_t *pip, int flags)
3794 3790  {
3795 3791          return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags));
3796 3792  }
3797 3793  
3798 3794  /*
3799 3795   * mdi_pi_offline():
3800 3796   *              Offline a mdi_pathinfo node.
3801 3797   * Return Values:
3802 3798   *              MDI_SUCCESS
3803 3799   *              MDI_FAILURE
3804 3800   */
3805 3801  int
3806 3802  mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3807 3803  {
3808 3804          int     ret, client_held = 0;
3809 3805          mdi_client_t    *ct;
3810 3806  
3811 3807          /*
3812 3808           * Original code overloaded NDI_DEVI_REMOVE to this interface, and
3813 3809           * used it to mean "user initiated operation" (i.e. devctl). Callers
3814 3810           * should now just use NDI_USER_REQ.
3815 3811           */
3816 3812          if (flags & NDI_DEVI_REMOVE) {
3817 3813                  flags &= ~NDI_DEVI_REMOVE;
3818 3814                  flags |= NDI_USER_REQ;
3819 3815          }
3820 3816  
3821 3817          ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags);
3822 3818  
3823 3819          if (ret == MDI_SUCCESS) {
3824 3820                  MDI_PI_LOCK(pip);
3825 3821                  if (MDI_PI(pip)->pi_pm_held) {
3826 3822                          client_held = 1;
3827 3823                  }
3828 3824                  MDI_PI_UNLOCK(pip);
3829 3825  
3830 3826                  if (client_held) {
3831 3827                          ct = MDI_PI(pip)->pi_client;
3832 3828                          MDI_CLIENT_LOCK(ct);
3833 3829                          MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3834 3830                              "i_mdi_pm_rele_client\n"));
3835 3831                          i_mdi_pm_rele_client(ct, 1);
3836 3832                          MDI_CLIENT_UNLOCK(ct);
3837 3833                  }
3838 3834          }
3839 3835  
3840 3836          return (ret);
3841 3837  }
3842 3838  
3843 3839  /*
3844 3840   * i_mdi_pi_offline():
3845 3841   *              Offline a mdi_pathinfo node and call the vHCI driver's callback
3846 3842   */
3847 3843  static int
3848 3844  i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3849 3845  {
3850 3846          dev_info_t      *vdip = NULL;
3851 3847          mdi_vhci_t      *vh = NULL;
3852 3848          mdi_client_t    *ct = NULL;
3853 3849          int             (*f)();
3854 3850          int             rv;
3855 3851  
3856 3852          MDI_PI_LOCK(pip);
3857 3853          ct = MDI_PI(pip)->pi_client;
3858 3854          ASSERT(ct != NULL);
3859 3855  
3860 3856          while (MDI_PI(pip)->pi_ref_cnt != 0) {
3861 3857                  /*
3862 3858                   * Give a chance for pending I/Os to complete.
3863 3859                   */
3864 3860                  MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3865 3861                      "!%d cmds still pending on path %s %p",
3866 3862                      MDI_PI(pip)->pi_ref_cnt, mdi_pi_spathname(pip),
3867 3863                      (void *)pip));
3868 3864                  if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
3869 3865                      &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
3870 3866                      TR_CLOCK_TICK) == -1) {
3871 3867                          /*
3872 3868                           * The timeout time reached without ref_cnt being zero
3873 3869                           * being signaled.
3874 3870                           */
3875 3871                          MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3876 3872                              "!Timeout reached on path %s %p without the cond",
3877 3873                              mdi_pi_spathname(pip), (void *)pip));
3878 3874                          MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3879 3875                              "!%d cmds still pending on path %s %p",
3880 3876                              MDI_PI(pip)->pi_ref_cnt,
3881 3877                              mdi_pi_spathname(pip), (void *)pip));
3882 3878                  }
3883 3879          }
3884 3880          vh = ct->ct_vhci;
3885 3881          vdip = vh->vh_dip;
3886 3882  
3887 3883          /*
3888 3884           * Notify vHCI that has registered this event
3889 3885           */
3890 3886          ASSERT(vh->vh_ops);
3891 3887          f = vh->vh_ops->vo_pi_state_change;
3892 3888  
3893 3889          if (f != NULL) {
3894 3890                  MDI_PI_UNLOCK(pip);
3895 3891                  if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0,
3896 3892                      flags)) != MDI_SUCCESS) {
3897 3893                          MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3898 3894                              "!vo_path_offline failed: vdip %s%d %p: path %s %p",
3899 3895                              ddi_driver_name(vdip), ddi_get_instance(vdip),
3900 3896                              (void *)vdip, mdi_pi_spathname(pip), (void *)pip));
3901 3897                  }
3902 3898                  MDI_PI_LOCK(pip);
3903 3899          }
3904 3900  
3905 3901          /*
3906 3902           * Set the mdi_pathinfo node state and clear the transient condition
3907 3903           */
3908 3904          MDI_PI_SET_OFFLINE(pip);
3909 3905          cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3910 3906          MDI_PI_UNLOCK(pip);
3911 3907  
3912 3908          MDI_CLIENT_LOCK(ct);
3913 3909          if (rv == MDI_SUCCESS) {
3914 3910                  if (ct->ct_unstable == 0) {
3915 3911                          dev_info_t      *cdip = ct->ct_dip;
3916 3912  
3917 3913                          /*
3918 3914                           * Onlining the mdi_pathinfo node will impact the
3919 3915                           * client state Update the client and dev_info node
3920 3916                           * state accordingly
3921 3917                           */
3922 3918                          i_mdi_client_update_state(ct);
3923 3919                          rv = NDI_SUCCESS;
3924 3920                          if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
3925 3921                                  if (cdip &&
3926 3922                                      (i_ddi_node_state(cdip) >=
3927 3923                                      DS_INITIALIZED)) {
3928 3924                                          MDI_CLIENT_UNLOCK(ct);
3929 3925                                          rv = ndi_devi_offline(cdip,
3930 3926                                              NDI_DEVFS_CLEAN);
3931 3927                                          MDI_CLIENT_LOCK(ct);
3932 3928                                          if (rv != NDI_SUCCESS) {
3933 3929                                                  /*
3934 3930                                                   * ndi_devi_offline failed.
3935 3931                                                   * Reset client flags to
3936 3932                                                   * online.
3937 3933                                                   */
3938 3934                                                  MDI_DEBUG(4, (MDI_WARN, cdip,
3939 3935                                                      "ndi_devi_offline failed: "
3940 3936                                                      "error %x", rv));
3941 3937                                                  MDI_CLIENT_SET_ONLINE(ct);
3942 3938                                          }
3943 3939                                  }
3944 3940                          }
3945 3941                          /*
3946 3942                           * Convert to MDI error code
3947 3943                           */
3948 3944                          switch (rv) {
3949 3945                          case NDI_SUCCESS:
3950 3946                                  rv = MDI_SUCCESS;
3951 3947                                  break;
3952 3948                          case NDI_BUSY:
3953 3949                                  rv = MDI_BUSY;
3954 3950                                  break;
3955 3951                          default:
3956 3952                                  rv = MDI_FAILURE;
3957 3953                                  break;
3958 3954                          }
3959 3955                  }
3960 3956                  MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3961 3957                  i_mdi_report_path_state(ct, pip);
3962 3958          }
3963 3959  
3964 3960          MDI_CLIENT_UNLOCK(ct);
3965 3961  
3966 3962          /*
3967 3963           * Change in the mdi_pathinfo node state will impact the client state
3968 3964           */
3969 3965          MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
3970 3966              "ct = %p pip = %p", (void *)ct, (void *)pip));
3971 3967          return (rv);
3972 3968  }
3973 3969  
3974 3970  /*
3975 3971   * i_mdi_pi_online():
3976 3972   *              Online a mdi_pathinfo node and call the vHCI driver's callback
3977 3973   */
3978 3974  static int
3979 3975  i_mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3980 3976  {
3981 3977          mdi_vhci_t      *vh = NULL;
3982 3978          mdi_client_t    *ct = NULL;
3983 3979          mdi_phci_t      *ph;
3984 3980          int             (*f)();
3985 3981          int             rv;
3986 3982  
3987 3983          MDI_PI_LOCK(pip);
3988 3984          ph = MDI_PI(pip)->pi_phci;
3989 3985          vh = ph->ph_vhci;
3990 3986          ct = MDI_PI(pip)->pi_client;
3991 3987          MDI_PI_SET_ONLINING(pip)
3992 3988          MDI_PI_UNLOCK(pip);
3993 3989          f = vh->vh_ops->vo_pi_state_change;
3994 3990          if (f != NULL)
3995 3991                  rv = (*f)(vh->vh_dip, pip, MDI_PATHINFO_STATE_ONLINE, 0,
3996 3992                      flags);
3997 3993          MDI_CLIENT_LOCK(ct);
3998 3994          MDI_PI_LOCK(pip);
3999 3995          cv_broadcast(&MDI_PI(pip)->pi_state_cv);
4000 3996          MDI_PI_UNLOCK(pip);
4001 3997          if (rv == MDI_SUCCESS) {
4002 3998                  dev_info_t      *cdip = ct->ct_dip;
4003 3999  
4004 4000                  rv = MDI_SUCCESS;
4005 4001                  i_mdi_client_update_state(ct);
4006 4002                  if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL ||
4007 4003                      MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4008 4004                          if (cdip && !i_ddi_devi_attached(cdip)) {
4009 4005                                  MDI_CLIENT_UNLOCK(ct);
4010 4006                                  rv = ndi_devi_online(cdip, 0);
4011 4007                                  MDI_CLIENT_LOCK(ct);
4012 4008                                  if ((rv != NDI_SUCCESS) &&
4013 4009                                      (MDI_CLIENT_STATE(ct) ==
4014 4010                                      MDI_CLIENT_STATE_DEGRADED)) {
4015 4011                                          MDI_CLIENT_SET_OFFLINE(ct);
4016 4012                                  }
4017 4013                                  if (rv != NDI_SUCCESS) {
4018 4014                                          /* Reset the path state */
4019 4015                                          MDI_PI_LOCK(pip);
4020 4016                                          MDI_PI(pip)->pi_state =
4021 4017                                              MDI_PI_OLD_STATE(pip);
4022 4018                                          MDI_PI_UNLOCK(pip);
4023 4019                                  }
4024 4020                          }
4025 4021                  }
4026 4022                  switch (rv) {
4027 4023                  case NDI_SUCCESS:
4028 4024                          MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
4029 4025                          i_mdi_report_path_state(ct, pip);
4030 4026                          rv = MDI_SUCCESS;
4031 4027                          break;
4032 4028                  case NDI_BUSY:
4033 4029                          rv = MDI_BUSY;
4034 4030                          break;
4035 4031                  default:
4036 4032                          rv = MDI_FAILURE;
4037 4033                          break;
4038 4034                  }
4039 4035          } else {
4040 4036                  /* Reset the path state */
4041 4037                  MDI_PI_LOCK(pip);
4042 4038                  MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
4043 4039                  MDI_PI_UNLOCK(pip);
4044 4040          }
4045 4041          MDI_CLIENT_UNLOCK(ct);
4046 4042          return (rv);
4047 4043  }
4048 4044  
4049 4045  /*
4050 4046   * mdi_pi_get_node_name():
4051 4047   *              Get the name associated with a mdi_pathinfo node.
4052 4048   *              Since pathinfo nodes are not directly named, we
4053 4049   *              return the node_name of the client.
4054 4050   *
4055 4051   * Return Values:
4056 4052   *              char *
4057 4053   */
4058 4054  char *
4059 4055  mdi_pi_get_node_name(mdi_pathinfo_t *pip)
4060 4056  {
4061 4057          mdi_client_t    *ct;
4062 4058  
4063 4059          if (pip == NULL)
4064 4060                  return (NULL);
4065 4061          ct = MDI_PI(pip)->pi_client;
4066 4062          if ((ct == NULL) || (ct->ct_dip == NULL))
4067 4063                  return (NULL);
4068 4064          return (ddi_node_name(ct->ct_dip));
4069 4065  }
4070 4066  
4071 4067  /*
4072 4068   * mdi_pi_get_addr():
4073 4069   *              Get the unit address associated with a mdi_pathinfo node
4074 4070   *
4075 4071   * Return Values:
4076 4072   *              char *
4077 4073   */
4078 4074  char *
4079 4075  mdi_pi_get_addr(mdi_pathinfo_t *pip)
4080 4076  {
4081 4077          if (pip == NULL)
4082 4078                  return (NULL);
4083 4079  
4084 4080          return (MDI_PI(pip)->pi_addr);
4085 4081  }
4086 4082  
4087 4083  /*
4088 4084   * mdi_pi_get_path_instance():
4089 4085   *              Get the 'path_instance' of a mdi_pathinfo node
4090 4086   *
4091 4087   * Return Values:
4092 4088   *              path_instance
4093 4089   */
4094 4090  int
4095 4091  mdi_pi_get_path_instance(mdi_pathinfo_t *pip)
4096 4092  {
4097 4093          if (pip == NULL)
4098 4094                  return (0);
4099 4095  
4100 4096          return (MDI_PI(pip)->pi_path_instance);
4101 4097  }
4102 4098  
4103 4099  /*
4104 4100   * mdi_pi_pathname():
4105 4101   *              Return pointer to path to pathinfo node.
4106 4102   */
4107 4103  char *
4108 4104  mdi_pi_pathname(mdi_pathinfo_t *pip)
4109 4105  {
4110 4106          if (pip == NULL)
4111 4107                  return (NULL);
4112 4108          return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip)));
4113 4109  }
4114 4110  
4115 4111  /*
4116 4112   * mdi_pi_spathname():
4117 4113   *              Return pointer to shortpath to pathinfo node. Used for debug
4118 4114   *              messages, so return "" instead of NULL when unknown.
4119 4115   */
4120 4116  char *
4121 4117  mdi_pi_spathname(mdi_pathinfo_t *pip)
4122 4118  {
4123 4119          char    *spath = "";
4124 4120  
4125 4121          if (pip) {
4126 4122                  spath = mdi_pi_spathname_by_instance(
4127 4123                      mdi_pi_get_path_instance(pip));
4128 4124                  if (spath == NULL)
4129 4125                          spath = "";
4130 4126          }
4131 4127          return (spath);
4132 4128  }
4133 4129  
4134 4130  char *
4135 4131  mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path)
4136 4132  {
4137 4133          char *obp_path = NULL;
4138 4134          if ((pip == NULL) || (path == NULL))
4139 4135                  return (NULL);
4140 4136  
4141 4137          if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) {
4142 4138                  (void) strcpy(path, obp_path);
4143 4139                  (void) mdi_prop_free(obp_path);
4144 4140          } else {
4145 4141                  path = NULL;
4146 4142          }
4147 4143          return (path);
4148 4144  }
4149 4145  
4150 4146  int
4151 4147  mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component)
4152 4148  {
4153 4149          dev_info_t *pdip;
4154 4150          char *obp_path = NULL;
4155 4151          int rc = MDI_FAILURE;
4156 4152  
4157 4153          if (pip == NULL)
4158 4154                  return (MDI_FAILURE);
4159 4155  
4160 4156          pdip = mdi_pi_get_phci(pip);
4161 4157          if (pdip == NULL)
4162 4158                  return (MDI_FAILURE);
4163 4159  
4164 4160          obp_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
4165 4161  
4166 4162          if (ddi_pathname_obp(pdip, obp_path) == NULL) {
4167 4163                  (void) ddi_pathname(pdip, obp_path);
4168 4164          }
4169 4165  
4170 4166          if (component) {
4171 4167                  (void) strncat(obp_path, "/", MAXPATHLEN);
4172 4168                  (void) strncat(obp_path, component, MAXPATHLEN);
4173 4169          }
4174 4170          rc = mdi_prop_update_string(pip, "obp-path", obp_path);
4175 4171  
4176 4172          if (obp_path)
4177 4173                  kmem_free(obp_path, MAXPATHLEN);
4178 4174          return (rc);
4179 4175  }
4180 4176  
4181 4177  /*
4182 4178   * mdi_pi_get_client():
4183 4179   *              Get the client devinfo associated with a mdi_pathinfo node
4184 4180   *
4185 4181   * Return Values:
4186 4182   *              Handle to client device dev_info node
4187 4183   */
4188 4184  dev_info_t *
4189 4185  mdi_pi_get_client(mdi_pathinfo_t *pip)
4190 4186  {
4191 4187          dev_info_t      *dip = NULL;
4192 4188          if (pip) {
4193 4189                  dip = MDI_PI(pip)->pi_client->ct_dip;
4194 4190          }
4195 4191          return (dip);
4196 4192  }
4197 4193  
4198 4194  /*
4199 4195   * mdi_pi_get_phci():
4200 4196   *              Get the pHCI devinfo associated with the mdi_pathinfo node
4201 4197   * Return Values:
4202 4198   *              Handle to dev_info node
4203 4199   */
4204 4200  dev_info_t *
4205 4201  mdi_pi_get_phci(mdi_pathinfo_t *pip)
4206 4202  {
4207 4203          dev_info_t      *dip = NULL;
4208 4204          mdi_phci_t      *ph;
4209 4205  
4210 4206          if (pip) {
4211 4207                  ph = MDI_PI(pip)->pi_phci;
4212 4208                  if (ph)
4213 4209                          dip = ph->ph_dip;
4214 4210          }
4215 4211          return (dip);
4216 4212  }
4217 4213  
4218 4214  /*
4219 4215   * mdi_pi_get_client_private():
4220 4216   *              Get the client private information associated with the
4221 4217   *              mdi_pathinfo node
4222 4218   */
4223 4219  void *
4224 4220  mdi_pi_get_client_private(mdi_pathinfo_t *pip)
4225 4221  {
4226 4222          void *cprivate = NULL;
4227 4223          if (pip) {
4228 4224                  cprivate = MDI_PI(pip)->pi_cprivate;
4229 4225          }
4230 4226          return (cprivate);
4231 4227  }
4232 4228  
4233 4229  /*
4234 4230   * mdi_pi_set_client_private():
4235 4231   *              Set the client private information in the mdi_pathinfo node
4236 4232   */
4237 4233  void
4238 4234  mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv)
4239 4235  {
4240 4236          if (pip) {
4241 4237                  MDI_PI(pip)->pi_cprivate = priv;
4242 4238          }
4243 4239  }
4244 4240  
4245 4241  /*
4246 4242   * mdi_pi_get_phci_private():
4247 4243   *              Get the pHCI private information associated with the
4248 4244   *              mdi_pathinfo node
4249 4245   */
4250 4246  caddr_t
4251 4247  mdi_pi_get_phci_private(mdi_pathinfo_t *pip)
4252 4248  {
4253 4249          caddr_t pprivate = NULL;
4254 4250  
4255 4251          if (pip) {
4256 4252                  pprivate = MDI_PI(pip)->pi_pprivate;
4257 4253          }
4258 4254          return (pprivate);
4259 4255  }
4260 4256  
4261 4257  /*
4262 4258   * mdi_pi_set_phci_private():
4263 4259   *              Set the pHCI private information in the mdi_pathinfo node
4264 4260   */
4265 4261  void
4266 4262  mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv)
4267 4263  {
4268 4264          if (pip) {
4269 4265                  MDI_PI(pip)->pi_pprivate = priv;
4270 4266          }
4271 4267  }
4272 4268  
4273 4269  /*
4274 4270   * mdi_pi_get_state():
4275 4271   *              Get the mdi_pathinfo node state. Transient states are internal
4276 4272   *              and not provided to the users
4277 4273   */
4278 4274  mdi_pathinfo_state_t
4279 4275  mdi_pi_get_state(mdi_pathinfo_t *pip)
4280 4276  {
4281 4277          mdi_pathinfo_state_t    state = MDI_PATHINFO_STATE_INIT;
4282 4278  
4283 4279          if (pip) {
4284 4280                  if (MDI_PI_IS_TRANSIENT(pip)) {
4285 4281                          /*
4286 4282                           * mdi_pathinfo is in state transition.  Return the
4287 4283                           * last good state.
4288 4284                           */
4289 4285                          state = MDI_PI_OLD_STATE(pip);
4290 4286                  } else {
4291 4287                          state = MDI_PI_STATE(pip);
4292 4288                  }
4293 4289          }
4294 4290          return (state);
4295 4291  }
4296 4292  
4297 4293  /*
4298 4294   * mdi_pi_get_flags():
4299 4295   *              Get the mdi_pathinfo node flags.
4300 4296   */
4301 4297  uint_t
4302 4298  mdi_pi_get_flags(mdi_pathinfo_t *pip)
4303 4299  {
4304 4300          return (pip ? MDI_PI(pip)->pi_flags : 0);
4305 4301  }
4306 4302  
4307 4303  /*
4308 4304   * Note that the following function needs to be the new interface for
4309 4305   * mdi_pi_get_state when mpxio gets integrated to ON.
4310 4306   */
4311 4307  int
4312 4308  mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state,
4313 4309                  uint32_t *ext_state)
4314 4310  {
4315 4311          *state = MDI_PATHINFO_STATE_INIT;
4316 4312  
4317 4313          if (pip) {
4318 4314                  if (MDI_PI_IS_TRANSIENT(pip)) {
4319 4315                          /*
4320 4316                           * mdi_pathinfo is in state transition.  Return the
4321 4317                           * last good state.
4322 4318                           */
4323 4319                          *state = MDI_PI_OLD_STATE(pip);
4324 4320                          *ext_state = MDI_PI_OLD_EXT_STATE(pip);
4325 4321                  } else {
4326 4322                          *state = MDI_PI_STATE(pip);
4327 4323                          *ext_state = MDI_PI_EXT_STATE(pip);
4328 4324                  }
4329 4325          }
4330 4326          return (MDI_SUCCESS);
4331 4327  }
4332 4328  
4333 4329  /*
4334 4330   * mdi_pi_get_preferred:
4335 4331   *      Get the preferred path flag
4336 4332   */
4337 4333  int
4338 4334  mdi_pi_get_preferred(mdi_pathinfo_t *pip)
4339 4335  {
4340 4336          if (pip) {
4341 4337                  return (MDI_PI(pip)->pi_preferred);
4342 4338          }
4343 4339          return (0);
4344 4340  }
4345 4341  
4346 4342  /*
4347 4343   * mdi_pi_set_preferred:
4348 4344   *      Set the preferred path flag
4349 4345   */
4350 4346  void
4351 4347  mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred)
4352 4348  {
4353 4349          if (pip) {
4354 4350                  MDI_PI(pip)->pi_preferred = preferred;
4355 4351          }
4356 4352  }
4357 4353  
4358 4354  /*
4359 4355   * mdi_pi_set_state():
4360 4356   *              Set the mdi_pathinfo node state
4361 4357   */
4362 4358  void
4363 4359  mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state)
4364 4360  {
4365 4361          uint32_t        ext_state;
4366 4362  
4367 4363          if (pip) {
4368 4364                  ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK;
4369 4365                  MDI_PI(pip)->pi_state = state;
4370 4366                  MDI_PI(pip)->pi_state |= ext_state;
4371 4367  
4372 4368                  /* Path has changed state, invalidate DINFOCACHE snap shot. */
4373 4369                  i_ddi_di_cache_invalidate();
4374 4370          }
4375 4371  }
4376 4372  
4377 4373  /*
4378 4374   * Property functions:
4379 4375   */
4380 4376  int
4381 4377  i_map_nvlist_error_to_mdi(int val)
4382 4378  {
4383 4379          int rv;
4384 4380  
4385 4381          switch (val) {
4386 4382          case 0:
4387 4383                  rv = DDI_PROP_SUCCESS;
4388 4384                  break;
4389 4385          case EINVAL:
4390 4386          case ENOTSUP:
4391 4387                  rv = DDI_PROP_INVAL_ARG;
4392 4388                  break;
4393 4389          case ENOMEM:
4394 4390                  rv = DDI_PROP_NO_MEMORY;
4395 4391                  break;
4396 4392          default:
4397 4393                  rv = DDI_PROP_NOT_FOUND;
4398 4394                  break;
4399 4395          }
4400 4396          return (rv);
4401 4397  }
4402 4398  
4403 4399  /*
4404 4400   * mdi_pi_get_next_prop():
4405 4401   *              Property walk function.  The caller should hold mdi_pi_lock()
4406 4402   *              and release by calling mdi_pi_unlock() at the end of walk to
4407 4403   *              get a consistent value.
4408 4404   */
4409 4405  nvpair_t *
4410 4406  mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev)
4411 4407  {
4412 4408          if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4413 4409                  return (NULL);
4414 4410          }
4415 4411          ASSERT(MDI_PI_LOCKED(pip));
4416 4412          return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev));
4417 4413  }
4418 4414  
4419 4415  /*
4420 4416   * mdi_prop_remove():
4421 4417   *              Remove the named property from the named list.
4422 4418   */
4423 4419  int
4424 4420  mdi_prop_remove(mdi_pathinfo_t *pip, char *name)
4425 4421  {
4426 4422          if (pip == NULL) {
4427 4423                  return (DDI_PROP_NOT_FOUND);
4428 4424          }
4429 4425          ASSERT(!MDI_PI_LOCKED(pip));
4430 4426          MDI_PI_LOCK(pip);
4431 4427          if (MDI_PI(pip)->pi_prop == NULL) {
4432 4428                  MDI_PI_UNLOCK(pip);
4433 4429                  return (DDI_PROP_NOT_FOUND);
4434 4430          }
4435 4431          if (name) {
4436 4432                  (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name);
4437 4433          } else {
4438 4434                  char            nvp_name[MAXNAMELEN];
4439 4435                  nvpair_t        *nvp;
4440 4436                  nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL);
4441 4437                  while (nvp) {
4442 4438                          nvpair_t        *next;
4443 4439                          next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp);
4444 4440                          (void) snprintf(nvp_name, sizeof(nvp_name), "%s",
4445 4441                              nvpair_name(nvp));
4446 4442                          (void) nvlist_remove_all(MDI_PI(pip)->pi_prop,
4447 4443                              nvp_name);
4448 4444                          nvp = next;
4449 4445                  }
4450 4446          }
4451 4447          MDI_PI_UNLOCK(pip);
4452 4448          return (DDI_PROP_SUCCESS);
4453 4449  }
4454 4450  
4455 4451  /*
4456 4452   * mdi_prop_size():
4457 4453   *              Get buffer size needed to pack the property data.
4458 4454   *              Caller should hold the mdi_pathinfo_t lock to get a consistent
4459 4455   *              buffer size.
4460 4456   */
4461 4457  int
4462 4458  mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp)
4463 4459  {
4464 4460          int     rv;
4465 4461          size_t  bufsize;
4466 4462  
4467 4463          *buflenp = 0;
4468 4464          if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4469 4465                  return (DDI_PROP_NOT_FOUND);
4470 4466          }
4471 4467          ASSERT(MDI_PI_LOCKED(pip));
4472 4468          rv = nvlist_size(MDI_PI(pip)->pi_prop,
4473 4469              &bufsize, NV_ENCODE_NATIVE);
4474 4470          *buflenp = bufsize;
4475 4471          return (i_map_nvlist_error_to_mdi(rv));
4476 4472  }
4477 4473  
4478 4474  /*
4479 4475   * mdi_prop_pack():
4480 4476   *              pack the property list.  The caller should hold the
4481 4477   *              mdi_pathinfo_t node to get a consistent data
4482 4478   */
4483 4479  int
4484 4480  mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen)
4485 4481  {
4486 4482          int     rv;
4487 4483          size_t  bufsize;
4488 4484  
4489 4485          if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) {
4490 4486                  return (DDI_PROP_NOT_FOUND);
4491 4487          }
4492 4488  
4493 4489          ASSERT(MDI_PI_LOCKED(pip));
4494 4490  
4495 4491          bufsize = buflen;
4496 4492          rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize,
4497 4493              NV_ENCODE_NATIVE, KM_SLEEP);
4498 4494  
4499 4495          return (i_map_nvlist_error_to_mdi(rv));
4500 4496  }
4501 4497  
4502 4498  /*
4503 4499   * mdi_prop_update_byte():
4504 4500   *              Create/Update a byte property
4505 4501   */
4506 4502  int
4507 4503  mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data)
4508 4504  {
4509 4505          int rv;
4510 4506  
4511 4507          if (pip == NULL) {
4512 4508                  return (DDI_PROP_INVAL_ARG);
4513 4509          }
4514 4510          ASSERT(!MDI_PI_LOCKED(pip));
4515 4511          MDI_PI_LOCK(pip);
4516 4512          if (MDI_PI(pip)->pi_prop == NULL) {
4517 4513                  MDI_PI_UNLOCK(pip);
4518 4514                  return (DDI_PROP_NOT_FOUND);
4519 4515          }
4520 4516          rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data);
4521 4517          MDI_PI_UNLOCK(pip);
4522 4518          return (i_map_nvlist_error_to_mdi(rv));
4523 4519  }
4524 4520  
4525 4521  /*
4526 4522   * mdi_prop_update_byte_array():
4527 4523   *              Create/Update a byte array property
4528 4524   */
4529 4525  int
4530 4526  mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data,
4531 4527      uint_t nelements)
4532 4528  {
4533 4529          int rv;
4534 4530  
4535 4531          if (pip == NULL) {
4536 4532                  return (DDI_PROP_INVAL_ARG);
4537 4533          }
4538 4534          ASSERT(!MDI_PI_LOCKED(pip));
4539 4535          MDI_PI_LOCK(pip);
4540 4536          if (MDI_PI(pip)->pi_prop == NULL) {
4541 4537                  MDI_PI_UNLOCK(pip);
4542 4538                  return (DDI_PROP_NOT_FOUND);
4543 4539          }
4544 4540          rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements);
4545 4541          MDI_PI_UNLOCK(pip);
4546 4542          return (i_map_nvlist_error_to_mdi(rv));
4547 4543  }
4548 4544  
4549 4545  /*
4550 4546   * mdi_prop_update_int():
4551 4547   *              Create/Update a 32 bit integer property
4552 4548   */
4553 4549  int
4554 4550  mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data)
4555 4551  {
4556 4552          int rv;
4557 4553  
4558 4554          if (pip == NULL) {
4559 4555                  return (DDI_PROP_INVAL_ARG);
4560 4556          }
4561 4557          ASSERT(!MDI_PI_LOCKED(pip));
4562 4558          MDI_PI_LOCK(pip);
4563 4559          if (MDI_PI(pip)->pi_prop == NULL) {
4564 4560                  MDI_PI_UNLOCK(pip);
4565 4561                  return (DDI_PROP_NOT_FOUND);
4566 4562          }
4567 4563          rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data);
4568 4564          MDI_PI_UNLOCK(pip);
4569 4565          return (i_map_nvlist_error_to_mdi(rv));
4570 4566  }
4571 4567  
4572 4568  /*
4573 4569   * mdi_prop_update_int64():
4574 4570   *              Create/Update a 64 bit integer property
4575 4571   */
4576 4572  int
4577 4573  mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data)
4578 4574  {
4579 4575          int rv;
4580 4576  
4581 4577          if (pip == NULL) {
4582 4578                  return (DDI_PROP_INVAL_ARG);
4583 4579          }
4584 4580          ASSERT(!MDI_PI_LOCKED(pip));
4585 4581          MDI_PI_LOCK(pip);
4586 4582          if (MDI_PI(pip)->pi_prop == NULL) {
4587 4583                  MDI_PI_UNLOCK(pip);
4588 4584                  return (DDI_PROP_NOT_FOUND);
4589 4585          }
4590 4586          rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data);
4591 4587          MDI_PI_UNLOCK(pip);
4592 4588          return (i_map_nvlist_error_to_mdi(rv));
4593 4589  }
4594 4590  
4595 4591  /*
4596 4592   * mdi_prop_update_int_array():
4597 4593   *              Create/Update a int array property
4598 4594   */
4599 4595  int
4600 4596  mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data,
4601 4597              uint_t nelements)
4602 4598  {
4603 4599          int rv;
4604 4600  
4605 4601          if (pip == NULL) {
4606 4602                  return (DDI_PROP_INVAL_ARG);
4607 4603          }
4608 4604          ASSERT(!MDI_PI_LOCKED(pip));
4609 4605          MDI_PI_LOCK(pip);
4610 4606          if (MDI_PI(pip)->pi_prop == NULL) {
4611 4607                  MDI_PI_UNLOCK(pip);
4612 4608                  return (DDI_PROP_NOT_FOUND);
4613 4609          }
4614 4610          rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data,
4615 4611              nelements);
4616 4612          MDI_PI_UNLOCK(pip);
4617 4613          return (i_map_nvlist_error_to_mdi(rv));
4618 4614  }
4619 4615  
4620 4616  /*
4621 4617   * mdi_prop_update_string():
4622 4618   *              Create/Update a string property
4623 4619   */
4624 4620  int
4625 4621  mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data)
4626 4622  {
4627 4623          int rv;
4628 4624  
4629 4625          if (pip == NULL) {
4630 4626                  return (DDI_PROP_INVAL_ARG);
4631 4627          }
4632 4628          ASSERT(!MDI_PI_LOCKED(pip));
4633 4629          MDI_PI_LOCK(pip);
4634 4630          if (MDI_PI(pip)->pi_prop == NULL) {
4635 4631                  MDI_PI_UNLOCK(pip);
4636 4632                  return (DDI_PROP_NOT_FOUND);
4637 4633          }
4638 4634          rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data);
4639 4635          MDI_PI_UNLOCK(pip);
4640 4636          return (i_map_nvlist_error_to_mdi(rv));
4641 4637  }
4642 4638  
4643 4639  /*
4644 4640   * mdi_prop_update_string_array():
4645 4641   *              Create/Update a string array property
4646 4642   */
4647 4643  int
4648 4644  mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data,
4649 4645      uint_t nelements)
4650 4646  {
4651 4647          int rv;
4652 4648  
4653 4649          if (pip == NULL) {
4654 4650                  return (DDI_PROP_INVAL_ARG);
4655 4651          }
4656 4652          ASSERT(!MDI_PI_LOCKED(pip));
4657 4653          MDI_PI_LOCK(pip);
4658 4654          if (MDI_PI(pip)->pi_prop == NULL) {
4659 4655                  MDI_PI_UNLOCK(pip);
4660 4656                  return (DDI_PROP_NOT_FOUND);
4661 4657          }
4662 4658          rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data,
4663 4659              nelements);
4664 4660          MDI_PI_UNLOCK(pip);
4665 4661          return (i_map_nvlist_error_to_mdi(rv));
4666 4662  }
4667 4663  
4668 4664  /*
4669 4665   * mdi_prop_lookup_byte():
4670 4666   *              Look for byte property identified by name.  The data returned
4671 4667   *              is the actual property and valid as long as mdi_pathinfo_t node
4672 4668   *              is alive.
4673 4669   */
4674 4670  int
4675 4671  mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data)
4676 4672  {
4677 4673          int rv;
4678 4674  
4679 4675          if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4680 4676                  return (DDI_PROP_NOT_FOUND);
4681 4677          }
4682 4678          rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data);
4683 4679          return (i_map_nvlist_error_to_mdi(rv));
4684 4680  }
4685 4681  
4686 4682  
4687 4683  /*
4688 4684   * mdi_prop_lookup_byte_array():
4689 4685   *              Look for byte array property identified by name.  The data
4690 4686   *              returned is the actual property and valid as long as
4691 4687   *              mdi_pathinfo_t node is alive.
4692 4688   */
4693 4689  int
4694 4690  mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data,
4695 4691      uint_t *nelements)
4696 4692  {
4697 4693          int rv;
4698 4694  
4699 4695          if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4700 4696                  return (DDI_PROP_NOT_FOUND);
4701 4697          }
4702 4698          rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data,
4703 4699              nelements);
4704 4700          return (i_map_nvlist_error_to_mdi(rv));
4705 4701  }
4706 4702  
4707 4703  /*
4708 4704   * mdi_prop_lookup_int():
4709 4705   *              Look for int property identified by name.  The data returned
4710 4706   *              is the actual property and valid as long as mdi_pathinfo_t
4711 4707   *              node is alive.
4712 4708   */
4713 4709  int
4714 4710  mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data)
4715 4711  {
4716 4712          int rv;
4717 4713  
4718 4714          if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4719 4715                  return (DDI_PROP_NOT_FOUND);
4720 4716          }
4721 4717          rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data);
4722 4718          return (i_map_nvlist_error_to_mdi(rv));
4723 4719  }
4724 4720  
4725 4721  /*
4726 4722   * mdi_prop_lookup_int64():
4727 4723   *              Look for int64 property identified by name.  The data returned
4728 4724   *              is the actual property and valid as long as mdi_pathinfo_t node
4729 4725   *              is alive.
4730 4726   */
4731 4727  int
4732 4728  mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data)
4733 4729  {
4734 4730          int rv;
4735 4731          if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4736 4732                  return (DDI_PROP_NOT_FOUND);
4737 4733          }
4738 4734          rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data);
4739 4735          return (i_map_nvlist_error_to_mdi(rv));
4740 4736  }
4741 4737  
4742 4738  /*
4743 4739   * mdi_prop_lookup_int_array():
4744 4740   *              Look for int array property identified by name.  The data
4745 4741   *              returned is the actual property and valid as long as
4746 4742   *              mdi_pathinfo_t node is alive.
4747 4743   */
4748 4744  int
4749 4745  mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data,
4750 4746      uint_t *nelements)
4751 4747  {
4752 4748          int rv;
4753 4749  
4754 4750          if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4755 4751                  return (DDI_PROP_NOT_FOUND);
4756 4752          }
4757 4753          rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name,
4758 4754              (int32_t **)data, nelements);
4759 4755          return (i_map_nvlist_error_to_mdi(rv));
4760 4756  }
4761 4757  
4762 4758  /*
4763 4759   * mdi_prop_lookup_string():
4764 4760   *              Look for string property identified by name.  The data
4765 4761   *              returned is the actual property and valid as long as
4766 4762   *              mdi_pathinfo_t node is alive.
4767 4763   */
4768 4764  int
4769 4765  mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data)
4770 4766  {
4771 4767          int rv;
4772 4768  
4773 4769          if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4774 4770                  return (DDI_PROP_NOT_FOUND);
4775 4771          }
4776 4772          rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data);
4777 4773          return (i_map_nvlist_error_to_mdi(rv));
4778 4774  }
4779 4775  
4780 4776  /*
4781 4777   * mdi_prop_lookup_string_array():
4782 4778   *              Look for string array property identified by name.  The data
4783 4779   *              returned is the actual property and valid as long as
4784 4780   *              mdi_pathinfo_t node is alive.
4785 4781   */
4786 4782  int
4787 4783  mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data,
4788 4784      uint_t *nelements)
4789 4785  {
4790 4786          int rv;
4791 4787  
4792 4788          if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4793 4789                  return (DDI_PROP_NOT_FOUND);
4794 4790          }
4795 4791          rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data,
4796 4792              nelements);
4797 4793          return (i_map_nvlist_error_to_mdi(rv));
4798 4794  }
4799 4795  
4800 4796  /*
4801 4797   * mdi_prop_free():
4802 4798   *              Symmetrical function to ddi_prop_free(). nvlist_lookup_xx()
4803 4799   *              functions return the pointer to actual property data and not a
4804 4800   *              copy of it.  So the data returned is valid as long as
4805 4801   *              mdi_pathinfo_t node is valid.
4806 4802   */
4807 4803  /*ARGSUSED*/
4808 4804  int
4809 4805  mdi_prop_free(void *data)
4810 4806  {
4811 4807          return (DDI_PROP_SUCCESS);
4812 4808  }
4813 4809  
4814 4810  /*ARGSUSED*/
4815 4811  static void
4816 4812  i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip)
4817 4813  {
4818 4814          char            *ct_path;
4819 4815          char            *ct_status;
4820 4816          char            *status;
4821 4817          dev_info_t      *cdip = ct->ct_dip;
4822 4818          char            lb_buf[64];
4823 4819          int             report_lb_c = 0, report_lb_p = 0;
4824 4820  
4825 4821          ASSERT(MDI_CLIENT_LOCKED(ct));
4826 4822          if ((cdip == NULL) || (ddi_get_instance(cdip) == -1) ||
4827 4823              (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) {
4828 4824                  return;
4829 4825          }
4830 4826          if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) {
4831 4827                  ct_status = "optimal";
4832 4828                  report_lb_c = 1;
4833 4829          } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4834 4830                  ct_status = "degraded";
4835 4831          } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
4836 4832                  ct_status = "failed";
4837 4833          } else {
4838 4834                  ct_status = "unknown";
4839 4835          }
4840 4836  
4841 4837          lb_buf[0] = 0;          /* not interested in load balancing config */
4842 4838  
4843 4839          if (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)) {
4844 4840                  status = "removed";
4845 4841          } else if (MDI_PI_IS_OFFLINE(pip)) {
4846 4842                  status = "offline";
4847 4843          } else if (MDI_PI_IS_ONLINE(pip)) {
  
    | 
      ↓ open down ↓ | 
    1145 lines elided | 
    
      ↑ open up ↑ | 
  
4848 4844                  status = "online";
4849 4845                  report_lb_p = 1;
4850 4846          } else if (MDI_PI_IS_STANDBY(pip)) {
4851 4847                  status = "standby";
4852 4848          } else if (MDI_PI_IS_FAULT(pip)) {
4853 4849                  status = "faulted";
4854 4850          } else {
4855 4851                  status = "unknown";
4856 4852          }
4857 4853  
4858      -        if (cdip) {
     4854 +        if (cdip != NULL) {
4859 4855                  ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4860 4856  
4861      -                /*
4862      -                 * NOTE: Keeping "multipath status: %s" and
4863      -                 * "Load balancing: %s" format unchanged in case someone
4864      -                 * scrubs /var/adm/messages looking for these messages.
4865      -                 */
4866 4857                  if (report_lb_c && report_lb_p) {
4867 4858                          if (ct->ct_lb == LOAD_BALANCE_LBA) {
4868 4859                                  (void) snprintf(lb_buf, sizeof (lb_buf),
4869 4860                                      "%s, region-size: %d", mdi_load_balance_lba,
4870 4861                                      ct->ct_lb_args->region_size);
4871 4862                          } else if (ct->ct_lb == LOAD_BALANCE_NONE) {
4872 4863                                  (void) snprintf(lb_buf, sizeof (lb_buf),
4873 4864                                      "%s", mdi_load_balance_none);
4874 4865                          } else {
4875 4866                                  (void) snprintf(lb_buf, sizeof (lb_buf), "%s",
4876 4867                                      mdi_load_balance_rr);
4877 4868                          }
4878 4869  
4879      -                        cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT,
4880      -                            "?%s (%s%d) multipath status: %s: "
4881      -                            "path %d %s is %s: Load balancing: %s\n",
4882      -                            ddi_pathname(cdip, ct_path), ddi_driver_name(cdip),
4883      -                            ddi_get_instance(cdip), ct_status,
4884      -                            mdi_pi_get_path_instance(pip),
     4870 +                        dev_err(cdip, CE_CONT, "!multipath status: %s: "
     4871 +                            "path %d %s is %s; load balancing: %s\n",
     4872 +                            ct_status, mdi_pi_get_path_instance(pip),
4885 4873                              mdi_pi_spathname(pip), status, lb_buf);
4886 4874                  } else {
4887      -                        cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT,
4888      -                            "?%s (%s%d) multipath status: %s: "
4889      -                            "path %d %s is %s\n",
4890      -                            ddi_pathname(cdip, ct_path), ddi_driver_name(cdip),
4891      -                            ddi_get_instance(cdip), ct_status,
4892      -                            mdi_pi_get_path_instance(pip),
     4875 +                        dev_err(cdip, CE_CONT,
     4876 +                            "!multipath status: %s: path %d %s is %s\n",
     4877 +                            ct_status, mdi_pi_get_path_instance(pip),
4893 4878                              mdi_pi_spathname(pip), status);
4894 4879                  }
4895 4880  
4896 4881                  kmem_free(ct_path, MAXPATHLEN);
4897 4882                  MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct);
4898 4883          }
4899 4884  }
4900 4885  
4901 4886  #ifdef  DEBUG
4902 4887  /*
4903 4888   * i_mdi_log():
4904 4889   *              Utility function for error message management
4905 4890   *
4906 4891   *              NOTE: Implementation takes care of trailing \n for cmn_err,
4907 4892   *              MDI_DEBUG should not terminate fmt strings with \n.
4908 4893   *
4909 4894   *              NOTE: If the level is >= 2, and there is no leading !?^
4910 4895   *              then a leading ! is implied (but can be overriden via
4911 4896   *              mdi_debug_consoleonly). If you are using kmdb on the console,
4912 4897   *              consider setting mdi_debug_consoleonly to 1 as an aid.
4913 4898   */
4914 4899  /*PRINTFLIKE4*/
4915 4900  static void
4916 4901  i_mdi_log(int level, const char *func, dev_info_t *dip, const char *fmt, ...)
4917 4902  {
4918 4903          char            name[MAXNAMELEN];
4919 4904          char            buf[512];
4920 4905          char            *bp;
4921 4906          va_list         ap;
4922 4907          int             log_only = 0;
4923 4908          int             boot_only = 0;
4924 4909          int             console_only = 0;
4925 4910  
4926 4911          if (dip) {
4927 4912                  (void) snprintf(name, sizeof(name), "%s%d: ",
4928 4913                      ddi_driver_name(dip), ddi_get_instance(dip));
4929 4914          } else {
4930 4915                  name[0] = 0;
4931 4916          }
4932 4917  
4933 4918          va_start(ap, fmt);
4934 4919          (void) vsnprintf(buf, sizeof(buf), fmt, ap);
4935 4920          va_end(ap);
4936 4921  
4937 4922          switch (buf[0]) {
4938 4923          case '!':
4939 4924                  bp = &buf[1];
4940 4925                  log_only = 1;
4941 4926                  break;
4942 4927          case '?':
4943 4928                  bp = &buf[1];
4944 4929                  boot_only = 1;
4945 4930                  break;
4946 4931          case '^':
4947 4932                  bp = &buf[1];
4948 4933                  console_only = 1;
4949 4934                  break;
4950 4935          default:
4951 4936                  if (level >= 2)
4952 4937                          log_only = 1;           /* ! implied */
4953 4938                  bp = buf;
4954 4939                  break;
4955 4940          }
4956 4941          if (mdi_debug_logonly) {
4957 4942                  log_only = 1;
4958 4943                  boot_only = 0;
4959 4944                  console_only = 0;
4960 4945          }
4961 4946          if (mdi_debug_consoleonly) {
4962 4947                  log_only = 0;
4963 4948                  boot_only = 0;
4964 4949                  console_only = 1;
4965 4950                  level = CE_NOTE;
4966 4951                  goto console;
4967 4952          }
4968 4953  
4969 4954          switch (level) {
4970 4955          case CE_NOTE:
4971 4956                  level = CE_CONT;
4972 4957                  /* FALLTHROUGH */
4973 4958          case CE_CONT:
4974 4959                  if (boot_only) {
4975 4960                          cmn_err(level, "?mdi: %s%s: %s\n", name, func, bp);
4976 4961                  } else if (console_only) {
4977 4962                          cmn_err(level, "^mdi: %s%s: %s\n", name, func, bp);
4978 4963                  } else if (log_only) {
4979 4964                          cmn_err(level, "!mdi: %s%s: %s\n", name, func, bp);
4980 4965                  } else {
4981 4966                          cmn_err(level, "mdi: %s%s: %s\n", name, func, bp);
4982 4967                  }
4983 4968                  break;
4984 4969  
4985 4970          case CE_WARN:
4986 4971          case CE_PANIC:
4987 4972          console:
4988 4973                  if (boot_only) {
4989 4974                          cmn_err(level, "?mdi: %s%s: %s", name, func, bp);
4990 4975                  } else if (console_only) {
4991 4976                          cmn_err(level, "^mdi: %s%s: %s", name, func, bp);
4992 4977                  } else if (log_only) {
4993 4978                          cmn_err(level, "!mdi: %s%s: %s", name, func, bp);
4994 4979                  } else {
4995 4980                          cmn_err(level, "mdi: %s%s: %s", name, func, bp);
4996 4981                  }
4997 4982                  break;
4998 4983          default:
4999 4984                  cmn_err(level, "mdi: %s%s", name, bp);
5000 4985                  break;
5001 4986          }
5002 4987  }
5003 4988  #endif  /* DEBUG */
5004 4989  
5005 4990  void
5006 4991  i_mdi_client_online(dev_info_t *ct_dip)
5007 4992  {
5008 4993          mdi_client_t    *ct;
5009 4994  
5010 4995          /*
5011 4996           * Client online notification. Mark client state as online
5012 4997           * restore our binding with dev_info node
5013 4998           */
5014 4999          ct = i_devi_get_client(ct_dip);
5015 5000          ASSERT(ct != NULL);
5016 5001          MDI_CLIENT_LOCK(ct);
5017 5002          MDI_CLIENT_SET_ONLINE(ct);
5018 5003          /* catch for any memory leaks */
5019 5004          ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip));
5020 5005          ct->ct_dip = ct_dip;
5021 5006  
5022 5007          if (ct->ct_power_cnt == 0)
5023 5008                  (void) i_mdi_power_all_phci(ct);
5024 5009  
5025 5010          MDI_DEBUG(4, (MDI_NOTE, ct_dip,
5026 5011              "i_mdi_pm_hold_client %p", (void *)ct));
5027 5012          i_mdi_pm_hold_client(ct, 1);
5028 5013  
5029 5014          MDI_CLIENT_UNLOCK(ct);
5030 5015  }
5031 5016  
5032 5017  void
5033 5018  i_mdi_phci_online(dev_info_t *ph_dip)
5034 5019  {
5035 5020          mdi_phci_t      *ph;
5036 5021  
5037 5022          /* pHCI online notification. Mark state accordingly */
5038 5023          ph = i_devi_get_phci(ph_dip);
5039 5024          ASSERT(ph != NULL);
5040 5025          MDI_PHCI_LOCK(ph);
5041 5026          MDI_PHCI_SET_ONLINE(ph);
5042 5027          MDI_PHCI_UNLOCK(ph);
5043 5028  }
5044 5029  
5045 5030  /*
5046 5031   * mdi_devi_online():
5047 5032   *              Online notification from NDI framework on pHCI/client
5048 5033   *              device online.
5049 5034   * Return Values:
5050 5035   *              NDI_SUCCESS
5051 5036   *              MDI_FAILURE
5052 5037   */
5053 5038  /*ARGSUSED*/
5054 5039  int
5055 5040  mdi_devi_online(dev_info_t *dip, uint_t flags)
5056 5041  {
5057 5042          if (MDI_PHCI(dip)) {
5058 5043                  i_mdi_phci_online(dip);
5059 5044          }
5060 5045  
5061 5046          if (MDI_CLIENT(dip)) {
5062 5047                  i_mdi_client_online(dip);
5063 5048          }
5064 5049          return (NDI_SUCCESS);
5065 5050  }
5066 5051  
5067 5052  /*
5068 5053   * mdi_devi_offline():
5069 5054   *              Offline notification from NDI framework on pHCI/Client device
5070 5055   *              offline.
5071 5056   *
5072 5057   * Return Values:
5073 5058   *              NDI_SUCCESS
5074 5059   *              NDI_FAILURE
5075 5060   */
5076 5061  /*ARGSUSED*/
5077 5062  int
5078 5063  mdi_devi_offline(dev_info_t *dip, uint_t flags)
5079 5064  {
5080 5065          int             rv = NDI_SUCCESS;
5081 5066  
5082 5067          if (MDI_CLIENT(dip)) {
5083 5068                  rv = i_mdi_client_offline(dip, flags);
5084 5069                  if (rv != NDI_SUCCESS)
5085 5070                          return (rv);
5086 5071          }
5087 5072  
5088 5073          if (MDI_PHCI(dip)) {
5089 5074                  rv = i_mdi_phci_offline(dip, flags);
5090 5075  
5091 5076                  if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) {
5092 5077                          /* set client back online */
5093 5078                          i_mdi_client_online(dip);
5094 5079                  }
5095 5080          }
5096 5081  
5097 5082          return (rv);
5098 5083  }
5099 5084  
5100 5085  /*ARGSUSED*/
5101 5086  static int
5102 5087  i_mdi_phci_offline(dev_info_t *dip, uint_t flags)
5103 5088  {
5104 5089          int             rv = NDI_SUCCESS;
5105 5090          mdi_phci_t      *ph;
5106 5091          mdi_client_t    *ct;
5107 5092          mdi_pathinfo_t  *pip;
5108 5093          mdi_pathinfo_t  *next;
5109 5094          mdi_pathinfo_t  *failed_pip = NULL;
5110 5095          dev_info_t      *cdip;
5111 5096  
5112 5097          /*
5113 5098           * pHCI component offline notification
5114 5099           * Make sure that this pHCI instance is free to be offlined.
5115 5100           * If it is OK to proceed, Offline and remove all the child
5116 5101           * mdi_pathinfo nodes.  This process automatically offlines
5117 5102           * corresponding client devices, for which this pHCI provides
5118 5103           * critical services.
5119 5104           */
5120 5105          ph = i_devi_get_phci(dip);
5121 5106          MDI_DEBUG(2, (MDI_NOTE, dip,
5122 5107              "called %p %p", (void *)dip, (void *)ph));
5123 5108          if (ph == NULL) {
5124 5109                  return (rv);
5125 5110          }
5126 5111  
5127 5112          MDI_PHCI_LOCK(ph);
5128 5113  
5129 5114          if (MDI_PHCI_IS_OFFLINE(ph)) {
5130 5115                  MDI_DEBUG(1, (MDI_WARN, dip,
5131 5116                      "!pHCI already offlined: %p", (void *)dip));
5132 5117                  MDI_PHCI_UNLOCK(ph);
5133 5118                  return (NDI_SUCCESS);
5134 5119          }
5135 5120  
5136 5121          /*
5137 5122           * Check to see if the pHCI can be offlined
5138 5123           */
5139 5124          if (ph->ph_unstable) {
5140 5125                  MDI_DEBUG(1, (MDI_WARN, dip,
5141 5126                      "!One or more target devices are in transient state. "
5142 5127                      "This device can not be removed at this moment. "
5143 5128                      "Please try again later."));
5144 5129                  MDI_PHCI_UNLOCK(ph);
5145 5130                  return (NDI_BUSY);
5146 5131          }
5147 5132  
5148 5133          pip = ph->ph_path_head;
5149 5134          while (pip != NULL) {
5150 5135                  MDI_PI_LOCK(pip);
5151 5136                  next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5152 5137  
5153 5138                  /*
5154 5139                   * The mdi_pathinfo state is OK. Check the client state.
5155 5140                   * If failover in progress fail the pHCI from offlining
5156 5141                   */
5157 5142                  ct = MDI_PI(pip)->pi_client;
5158 5143                  i_mdi_client_lock(ct, pip);
5159 5144                  if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5160 5145                      (ct->ct_unstable)) {
5161 5146                          /*
5162 5147                           * Failover is in progress, Fail the DR
5163 5148                           */
5164 5149                          MDI_DEBUG(1, (MDI_WARN, dip,
5165 5150                              "!pHCI device is busy. "
5166 5151                              "This device can not be removed at this moment. "
5167 5152                              "Please try again later."));
5168 5153                          MDI_PI_UNLOCK(pip);
5169 5154                          i_mdi_client_unlock(ct);
5170 5155                          MDI_PHCI_UNLOCK(ph);
5171 5156                          return (NDI_BUSY);
5172 5157                  }
5173 5158                  MDI_PI_UNLOCK(pip);
5174 5159  
5175 5160                  /*
5176 5161                   * Check to see of we are removing the last path of this
5177 5162                   * client device...
5178 5163                   */
5179 5164                  cdip = ct->ct_dip;
5180 5165                  if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5181 5166                      (i_mdi_client_compute_state(ct, ph) ==
5182 5167                      MDI_CLIENT_STATE_FAILED)) {
5183 5168                          i_mdi_client_unlock(ct);
5184 5169                          MDI_PHCI_UNLOCK(ph);
5185 5170                          if (ndi_devi_offline(cdip,
5186 5171                              NDI_DEVFS_CLEAN) != NDI_SUCCESS) {
5187 5172                                  /*
5188 5173                                   * ndi_devi_offline() failed.
5189 5174                                   * This pHCI provides the critical path
5190 5175                                   * to one or more client devices.
5191 5176                                   * Return busy.
5192 5177                                   */
5193 5178                                  MDI_PHCI_LOCK(ph);
5194 5179                                  MDI_DEBUG(1, (MDI_WARN, dip,
5195 5180                                      "!pHCI device is busy. "
5196 5181                                      "This device can not be removed at this "
5197 5182                                      "moment. Please try again later."));
5198 5183                                  failed_pip = pip;
5199 5184                                  break;
5200 5185                          } else {
5201 5186                                  MDI_PHCI_LOCK(ph);
5202 5187                                  pip = next;
5203 5188                          }
5204 5189                  } else {
5205 5190                          i_mdi_client_unlock(ct);
5206 5191                          pip = next;
5207 5192                  }
5208 5193          }
5209 5194  
5210 5195          if (failed_pip) {
5211 5196                  pip = ph->ph_path_head;
5212 5197                  while (pip != failed_pip) {
5213 5198                          MDI_PI_LOCK(pip);
5214 5199                          next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5215 5200                          ct = MDI_PI(pip)->pi_client;
5216 5201                          i_mdi_client_lock(ct, pip);
5217 5202                          cdip = ct->ct_dip;
5218 5203                          switch (MDI_CLIENT_STATE(ct)) {
5219 5204                          case MDI_CLIENT_STATE_OPTIMAL:
5220 5205                          case MDI_CLIENT_STATE_DEGRADED:
5221 5206                                  if (cdip) {
5222 5207                                          MDI_PI_UNLOCK(pip);
5223 5208                                          i_mdi_client_unlock(ct);
5224 5209                                          MDI_PHCI_UNLOCK(ph);
5225 5210                                          (void) ndi_devi_online(cdip, 0);
5226 5211                                          MDI_PHCI_LOCK(ph);
5227 5212                                          pip = next;
5228 5213                                          continue;
5229 5214                                  }
5230 5215                                  break;
5231 5216  
5232 5217                          case MDI_CLIENT_STATE_FAILED:
5233 5218                                  if (cdip) {
5234 5219                                          MDI_PI_UNLOCK(pip);
5235 5220                                          i_mdi_client_unlock(ct);
5236 5221                                          MDI_PHCI_UNLOCK(ph);
5237 5222                                          (void) ndi_devi_offline(cdip,
5238 5223                                                  NDI_DEVFS_CLEAN);
5239 5224                                          MDI_PHCI_LOCK(ph);
5240 5225                                          pip = next;
5241 5226                                          continue;
5242 5227                                  }
5243 5228                                  break;
5244 5229                          }
5245 5230                          MDI_PI_UNLOCK(pip);
5246 5231                          i_mdi_client_unlock(ct);
5247 5232                          pip = next;
5248 5233                  }
5249 5234                  MDI_PHCI_UNLOCK(ph);
5250 5235                  return (NDI_BUSY);
5251 5236          }
5252 5237  
5253 5238          /*
5254 5239           * Mark the pHCI as offline
5255 5240           */
5256 5241          MDI_PHCI_SET_OFFLINE(ph);
5257 5242  
5258 5243          /*
5259 5244           * Mark the child mdi_pathinfo nodes as transient
5260 5245           */
5261 5246          pip = ph->ph_path_head;
5262 5247          while (pip != NULL) {
5263 5248                  MDI_PI_LOCK(pip);
5264 5249                  next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5265 5250                  MDI_PI_SET_OFFLINING(pip);
5266 5251                  MDI_PI_UNLOCK(pip);
5267 5252                  pip = next;
5268 5253          }
5269 5254          MDI_PHCI_UNLOCK(ph);
5270 5255          /*
5271 5256           * Give a chance for any pending commands to execute
5272 5257           */
5273 5258          delay_random(mdi_delay);
5274 5259          MDI_PHCI_LOCK(ph);
5275 5260          pip = ph->ph_path_head;
5276 5261          while (pip != NULL) {
5277 5262                  next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5278 5263                  (void) i_mdi_pi_offline(pip, flags);
5279 5264                  MDI_PI_LOCK(pip);
5280 5265                  ct = MDI_PI(pip)->pi_client;
5281 5266                  if (!MDI_PI_IS_OFFLINE(pip)) {
5282 5267                          MDI_DEBUG(1, (MDI_WARN, dip,
5283 5268                              "!pHCI device is busy. "
5284 5269                              "This device can not be removed at this moment. "
5285 5270                              "Please try again later."));
5286 5271                          MDI_PI_UNLOCK(pip);
5287 5272                          MDI_PHCI_SET_ONLINE(ph);
5288 5273                          MDI_PHCI_UNLOCK(ph);
5289 5274                          return (NDI_BUSY);
5290 5275                  }
5291 5276                  MDI_PI_UNLOCK(pip);
5292 5277                  pip = next;
5293 5278          }
5294 5279          MDI_PHCI_UNLOCK(ph);
5295 5280  
5296 5281          return (rv);
5297 5282  }
5298 5283  
5299 5284  void
5300 5285  mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array)
5301 5286  {
5302 5287          mdi_phci_t      *ph;
5303 5288          mdi_client_t    *ct;
5304 5289          mdi_pathinfo_t  *pip;
5305 5290          mdi_pathinfo_t  *next;
5306 5291          dev_info_t      *cdip;
5307 5292  
5308 5293          if (!MDI_PHCI(dip))
5309 5294                  return;
5310 5295  
5311 5296          ph = i_devi_get_phci(dip);
5312 5297          if (ph == NULL) {
5313 5298                  return;
5314 5299          }
5315 5300  
5316 5301          MDI_PHCI_LOCK(ph);
5317 5302  
5318 5303          if (MDI_PHCI_IS_OFFLINE(ph)) {
5319 5304                  /* has no last path */
5320 5305                  MDI_PHCI_UNLOCK(ph);
5321 5306                  return;
5322 5307          }
5323 5308  
5324 5309          pip = ph->ph_path_head;
5325 5310          while (pip != NULL) {
5326 5311                  MDI_PI_LOCK(pip);
5327 5312                  next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5328 5313  
5329 5314                  ct = MDI_PI(pip)->pi_client;
5330 5315                  i_mdi_client_lock(ct, pip);
5331 5316                  MDI_PI_UNLOCK(pip);
5332 5317  
5333 5318                  cdip = ct->ct_dip;
5334 5319                  if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5335 5320                      (i_mdi_client_compute_state(ct, ph) ==
5336 5321                      MDI_CLIENT_STATE_FAILED)) {
5337 5322                          /* Last path. Mark client dip as retiring */
5338 5323                          i_mdi_client_unlock(ct);
5339 5324                          MDI_PHCI_UNLOCK(ph);
5340 5325                          (void) e_ddi_mark_retiring(cdip, cons_array);
5341 5326                          MDI_PHCI_LOCK(ph);
5342 5327                          pip = next;
5343 5328                  } else {
5344 5329                          i_mdi_client_unlock(ct);
5345 5330                          pip = next;
5346 5331                  }
5347 5332          }
5348 5333  
5349 5334          MDI_PHCI_UNLOCK(ph);
5350 5335  
5351 5336          return;
5352 5337  }
5353 5338  
5354 5339  void
5355 5340  mdi_phci_retire_notify(dev_info_t *dip, int *constraint)
5356 5341  {
5357 5342          mdi_phci_t      *ph;
5358 5343          mdi_client_t    *ct;
5359 5344          mdi_pathinfo_t  *pip;
5360 5345          mdi_pathinfo_t  *next;
5361 5346          dev_info_t      *cdip;
5362 5347  
5363 5348          if (!MDI_PHCI(dip))
5364 5349                  return;
5365 5350  
5366 5351          ph = i_devi_get_phci(dip);
5367 5352          if (ph == NULL)
5368 5353                  return;
5369 5354  
5370 5355          MDI_PHCI_LOCK(ph);
5371 5356  
5372 5357          if (MDI_PHCI_IS_OFFLINE(ph)) {
5373 5358                  MDI_PHCI_UNLOCK(ph);
5374 5359                  /* not last path */
5375 5360                  return;
5376 5361          }
5377 5362  
5378 5363          if (ph->ph_unstable) {
5379 5364                  MDI_PHCI_UNLOCK(ph);
5380 5365                  /* can't check for constraints */
5381 5366                  *constraint = 0;
5382 5367                  return;
5383 5368          }
5384 5369  
5385 5370          pip = ph->ph_path_head;
5386 5371          while (pip != NULL) {
5387 5372                  MDI_PI_LOCK(pip);
5388 5373                  next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5389 5374  
5390 5375                  /*
5391 5376                   * The mdi_pathinfo state is OK. Check the client state.
5392 5377                   * If failover in progress fail the pHCI from offlining
5393 5378                   */
5394 5379                  ct = MDI_PI(pip)->pi_client;
5395 5380                  i_mdi_client_lock(ct, pip);
5396 5381                  if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5397 5382                      (ct->ct_unstable)) {
5398 5383                          /*
5399 5384                           * Failover is in progress, can't check for constraints
5400 5385                           */
5401 5386                          MDI_PI_UNLOCK(pip);
5402 5387                          i_mdi_client_unlock(ct);
5403 5388                          MDI_PHCI_UNLOCK(ph);
5404 5389                          *constraint = 0;
5405 5390                          return;
5406 5391                  }
5407 5392                  MDI_PI_UNLOCK(pip);
5408 5393  
5409 5394                  /*
5410 5395                   * Check to see of we are retiring the last path of this
5411 5396                   * client device...
5412 5397                   */
5413 5398                  cdip = ct->ct_dip;
5414 5399                  if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5415 5400                      (i_mdi_client_compute_state(ct, ph) ==
5416 5401                      MDI_CLIENT_STATE_FAILED)) {
5417 5402                          i_mdi_client_unlock(ct);
5418 5403                          MDI_PHCI_UNLOCK(ph);
5419 5404                          (void) e_ddi_retire_notify(cdip, constraint);
5420 5405                          MDI_PHCI_LOCK(ph);
5421 5406                          pip = next;
5422 5407                  } else {
5423 5408                          i_mdi_client_unlock(ct);
5424 5409                          pip = next;
5425 5410                  }
5426 5411          }
5427 5412  
5428 5413          MDI_PHCI_UNLOCK(ph);
5429 5414  
5430 5415          return;
5431 5416  }
5432 5417  
5433 5418  /*
5434 5419   * offline the path(s) hanging off the pHCI. If the
5435 5420   * last path to any client, check that constraints
5436 5421   * have been applied.
5437 5422   *
5438 5423   * If constraint is 0, we aren't going to retire the
5439 5424   * pHCI. However we still need to go through the paths
5440 5425   * calling e_ddi_retire_finalize() to clear their
5441 5426   * contract barriers.
5442 5427   */
5443 5428  void
5444 5429  mdi_phci_retire_finalize(dev_info_t *dip, int phci_only, void *constraint)
5445 5430  {
5446 5431          mdi_phci_t      *ph;
5447 5432          mdi_client_t    *ct;
5448 5433          mdi_pathinfo_t  *pip;
5449 5434          mdi_pathinfo_t  *next;
5450 5435          dev_info_t      *cdip;
5451 5436          int             unstable = 0;
5452 5437          int             tmp_constraint;
5453 5438  
5454 5439          if (!MDI_PHCI(dip))
5455 5440                  return;
5456 5441  
5457 5442          ph = i_devi_get_phci(dip);
5458 5443          if (ph == NULL) {
5459 5444                  /* no last path and no pips */
5460 5445                  return;
5461 5446          }
5462 5447  
5463 5448          MDI_PHCI_LOCK(ph);
5464 5449  
5465 5450          if (MDI_PHCI_IS_OFFLINE(ph)) {
5466 5451                  MDI_PHCI_UNLOCK(ph);
5467 5452                  /* no last path and no pips */
5468 5453                  return;
5469 5454          }
5470 5455  
5471 5456          /*
5472 5457           * Check to see if the pHCI can be offlined
5473 5458           */
5474 5459          if (ph->ph_unstable) {
5475 5460                  unstable = 1;
5476 5461          }
5477 5462  
5478 5463          pip = ph->ph_path_head;
5479 5464          while (pip != NULL) {
5480 5465                  MDI_PI_LOCK(pip);
5481 5466                  next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5482 5467  
5483 5468                  /*
5484 5469                   * if failover in progress fail the pHCI from offlining
5485 5470                   */
5486 5471                  ct = MDI_PI(pip)->pi_client;
5487 5472                  i_mdi_client_lock(ct, pip);
5488 5473                  if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5489 5474                      (ct->ct_unstable)) {
5490 5475                          unstable = 1;
5491 5476                  }
5492 5477                  MDI_PI_UNLOCK(pip);
5493 5478  
5494 5479                  /*
5495 5480                   * Check to see of we are removing the last path of this
5496 5481                   * client device...
5497 5482                   */
5498 5483                  cdip = ct->ct_dip;
5499 5484                  if (!phci_only && cdip &&
5500 5485                      (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5501 5486                      (i_mdi_client_compute_state(ct, ph) ==
5502 5487                      MDI_CLIENT_STATE_FAILED)) {
5503 5488                          i_mdi_client_unlock(ct);
5504 5489                          MDI_PHCI_UNLOCK(ph);
5505 5490                          /*
5506 5491                           * This is the last path to this client.
5507 5492                           *
5508 5493                           * Constraint will only be set to 1 if this client can
5509 5494                           * be retired (as already determined by
5510 5495                           * mdi_phci_retire_notify). However we don't actually
5511 5496                           * need to retire the client (we just retire the last
5512 5497                           * path - MPXIO will then fail all I/Os to the client).
5513 5498                           * But we still need to call e_ddi_retire_finalize so
5514 5499                           * the contract barriers can be cleared. Therefore we
5515 5500                           * temporarily set constraint = 0 so that the client
5516 5501                           * dip is not retired.
5517 5502                           */
5518 5503                          tmp_constraint = 0;
5519 5504                          (void) e_ddi_retire_finalize(cdip, &tmp_constraint);
5520 5505                          MDI_PHCI_LOCK(ph);
5521 5506                          pip = next;
5522 5507                  } else {
5523 5508                          i_mdi_client_unlock(ct);
5524 5509                          pip = next;
5525 5510                  }
5526 5511          }
5527 5512  
5528 5513          if (!phci_only && *((int *)constraint) == 0) {
5529 5514                  MDI_PHCI_UNLOCK(ph);
5530 5515                  return;
5531 5516          }
5532 5517  
5533 5518          /*
5534 5519           * Cannot offline pip(s)
5535 5520           */
5536 5521          if (unstable) {
5537 5522                  cmn_err(CE_WARN, "%s%d: mdi_phci_retire_finalize: "
5538 5523                      "pHCI in transient state, cannot retire",
5539 5524                      ddi_driver_name(dip), ddi_get_instance(dip));
5540 5525                  MDI_PHCI_UNLOCK(ph);
5541 5526                  return;
5542 5527          }
5543 5528  
5544 5529          /*
5545 5530           * Mark the pHCI as offline
5546 5531           */
5547 5532          MDI_PHCI_SET_OFFLINE(ph);
5548 5533  
5549 5534          /*
5550 5535           * Mark the child mdi_pathinfo nodes as transient
5551 5536           */
5552 5537          pip = ph->ph_path_head;
5553 5538          while (pip != NULL) {
5554 5539                  MDI_PI_LOCK(pip);
5555 5540                  next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5556 5541                  MDI_PI_SET_OFFLINING(pip);
5557 5542                  MDI_PI_UNLOCK(pip);
5558 5543                  pip = next;
5559 5544          }
5560 5545          MDI_PHCI_UNLOCK(ph);
5561 5546          /*
5562 5547           * Give a chance for any pending commands to execute
5563 5548           */
5564 5549          delay_random(mdi_delay);
5565 5550          MDI_PHCI_LOCK(ph);
5566 5551          pip = ph->ph_path_head;
5567 5552          while (pip != NULL) {
5568 5553                  next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5569 5554                  (void) i_mdi_pi_offline(pip, 0);
5570 5555                  MDI_PI_LOCK(pip);
5571 5556                  ct = MDI_PI(pip)->pi_client;
5572 5557                  if (!MDI_PI_IS_OFFLINE(pip)) {
5573 5558                          cmn_err(CE_WARN, "mdi_phci_retire_finalize: "
5574 5559                              "path %d %s busy, cannot offline",
5575 5560                              mdi_pi_get_path_instance(pip),
5576 5561                              mdi_pi_spathname(pip));
5577 5562                          MDI_PI_UNLOCK(pip);
5578 5563                          MDI_PHCI_SET_ONLINE(ph);
5579 5564                          MDI_PHCI_UNLOCK(ph);
5580 5565                          return;
5581 5566                  }
5582 5567                  MDI_PI_UNLOCK(pip);
5583 5568                  pip = next;
5584 5569          }
5585 5570          MDI_PHCI_UNLOCK(ph);
5586 5571  
5587 5572          return;
5588 5573  }
5589 5574  
5590 5575  void
5591 5576  mdi_phci_unretire(dev_info_t *dip)
5592 5577  {
5593 5578          mdi_phci_t      *ph;
5594 5579          mdi_pathinfo_t  *pip;
5595 5580          mdi_pathinfo_t  *next;
5596 5581  
5597 5582          ASSERT(MDI_PHCI(dip));
5598 5583  
5599 5584          /*
5600 5585           * Online the phci
5601 5586           */
5602 5587          i_mdi_phci_online(dip);
5603 5588  
5604 5589          ph = i_devi_get_phci(dip);
5605 5590          MDI_PHCI_LOCK(ph);
5606 5591          pip = ph->ph_path_head;
5607 5592          while (pip != NULL) {
5608 5593                  MDI_PI_LOCK(pip);
5609 5594                  next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5610 5595                  MDI_PI_UNLOCK(pip);
5611 5596                  (void) i_mdi_pi_online(pip, 0);
5612 5597                  pip = next;
5613 5598          }
5614 5599          MDI_PHCI_UNLOCK(ph);
5615 5600  }
5616 5601  
5617 5602  /*ARGSUSED*/
5618 5603  static int
5619 5604  i_mdi_client_offline(dev_info_t *dip, uint_t flags)
5620 5605  {
5621 5606          int             rv = NDI_SUCCESS;
5622 5607          mdi_client_t    *ct;
5623 5608  
5624 5609          /*
5625 5610           * Client component to go offline.  Make sure that we are
5626 5611           * not in failing over state and update client state
5627 5612           * accordingly
5628 5613           */
5629 5614          ct = i_devi_get_client(dip);
5630 5615          MDI_DEBUG(2, (MDI_NOTE, dip,
5631 5616              "called %p %p", (void *)dip, (void *)ct));
5632 5617          if (ct != NULL) {
5633 5618                  MDI_CLIENT_LOCK(ct);
5634 5619                  if (ct->ct_unstable) {
5635 5620                          /*
5636 5621                           * One or more paths are in transient state,
5637 5622                           * Dont allow offline of a client device
5638 5623                           */
5639 5624                          MDI_DEBUG(1, (MDI_WARN, dip,
5640 5625                              "!One or more paths to "
5641 5626                              "this device are in transient state. "
5642 5627                              "This device can not be removed at this moment. "
5643 5628                              "Please try again later."));
5644 5629                          MDI_CLIENT_UNLOCK(ct);
5645 5630                          return (NDI_BUSY);
5646 5631                  }
5647 5632                  if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
5648 5633                          /*
5649 5634                           * Failover is in progress, Dont allow DR of
5650 5635                           * a client device
5651 5636                           */
5652 5637                          MDI_DEBUG(1, (MDI_WARN, dip,
5653 5638                              "!Client device is Busy. "
5654 5639                              "This device can not be removed at this moment. "
5655 5640                              "Please try again later."));
5656 5641                          MDI_CLIENT_UNLOCK(ct);
5657 5642                          return (NDI_BUSY);
5658 5643                  }
5659 5644                  MDI_CLIENT_SET_OFFLINE(ct);
5660 5645  
5661 5646                  /*
5662 5647                   * Unbind our relationship with the dev_info node
5663 5648                   */
5664 5649                  if (flags & NDI_DEVI_REMOVE) {
5665 5650                          ct->ct_dip = NULL;
5666 5651                  }
5667 5652                  MDI_CLIENT_UNLOCK(ct);
5668 5653          }
5669 5654          return (rv);
5670 5655  }
5671 5656  
5672 5657  /*
5673 5658   * mdi_pre_attach():
5674 5659   *              Pre attach() notification handler
5675 5660   */
5676 5661  /*ARGSUSED*/
5677 5662  int
5678 5663  mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5679 5664  {
5680 5665          /* don't support old DDI_PM_RESUME */
5681 5666          if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) &&
5682 5667              (cmd == DDI_PM_RESUME))
5683 5668                  return (DDI_FAILURE);
5684 5669  
5685 5670          return (DDI_SUCCESS);
5686 5671  }
5687 5672  
5688 5673  /*
5689 5674   * mdi_post_attach():
5690 5675   *              Post attach() notification handler
5691 5676   */
5692 5677  /*ARGSUSED*/
5693 5678  void
5694 5679  mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error)
5695 5680  {
5696 5681          mdi_phci_t      *ph;
5697 5682          mdi_client_t    *ct;
5698 5683          mdi_vhci_t      *vh;
5699 5684  
5700 5685          if (MDI_PHCI(dip)) {
5701 5686                  ph = i_devi_get_phci(dip);
5702 5687                  ASSERT(ph != NULL);
5703 5688  
5704 5689                  MDI_PHCI_LOCK(ph);
5705 5690                  switch (cmd) {
5706 5691                  case DDI_ATTACH:
5707 5692                          MDI_DEBUG(2, (MDI_NOTE, dip,
5708 5693                              "phci post_attach called %p", (void *)ph));
5709 5694                          if (error == DDI_SUCCESS) {
5710 5695                                  MDI_PHCI_SET_ATTACH(ph);
5711 5696                          } else {
5712 5697                                  MDI_DEBUG(1, (MDI_NOTE, dip,
5713 5698                                      "!pHCI post_attach failed: error %d",
5714 5699                                      error));
5715 5700                                  MDI_PHCI_SET_DETACH(ph);
5716 5701                          }
5717 5702                          break;
5718 5703  
5719 5704                  case DDI_RESUME:
5720 5705                          MDI_DEBUG(2, (MDI_NOTE, dip,
5721 5706                              "pHCI post_resume: called %p", (void *)ph));
5722 5707                          if (error == DDI_SUCCESS) {
5723 5708                                  MDI_PHCI_SET_RESUME(ph);
5724 5709                          } else {
5725 5710                                  MDI_DEBUG(1, (MDI_NOTE, dip,
5726 5711                                      "!pHCI post_resume failed: error %d",
5727 5712                                      error));
5728 5713                                  MDI_PHCI_SET_SUSPEND(ph);
5729 5714                          }
5730 5715                          break;
5731 5716                  }
5732 5717                  MDI_PHCI_UNLOCK(ph);
5733 5718          }
5734 5719  
5735 5720          if (MDI_CLIENT(dip)) {
5736 5721                  ct = i_devi_get_client(dip);
5737 5722                  ASSERT(ct != NULL);
5738 5723  
5739 5724                  MDI_CLIENT_LOCK(ct);
5740 5725                  switch (cmd) {
5741 5726                  case DDI_ATTACH:
5742 5727                          MDI_DEBUG(2, (MDI_NOTE, dip,
5743 5728                              "client post_attach called %p", (void *)ct));
5744 5729                          if (error != DDI_SUCCESS) {
5745 5730                                  MDI_DEBUG(1, (MDI_NOTE, dip,
5746 5731                                      "!client post_attach failed: error %d",
5747 5732                                      error));
5748 5733                                  MDI_CLIENT_SET_DETACH(ct);
5749 5734                                  MDI_DEBUG(4, (MDI_WARN, dip,
5750 5735                                      "i_mdi_pm_reset_client"));
5751 5736                                  i_mdi_pm_reset_client(ct);
5752 5737                                  break;
5753 5738                          }
5754 5739  
5755 5740                          /*
5756 5741                           * Client device has successfully attached, inform
5757 5742                           * the vhci.
5758 5743                           */
5759 5744                          vh = ct->ct_vhci;
5760 5745                          if (vh->vh_ops->vo_client_attached)
5761 5746                                  (*vh->vh_ops->vo_client_attached)(dip);
5762 5747  
5763 5748                          MDI_CLIENT_SET_ATTACH(ct);
5764 5749                          break;
5765 5750  
5766 5751                  case DDI_RESUME:
5767 5752                          MDI_DEBUG(2, (MDI_NOTE, dip,
5768 5753                              "client post_attach: called %p", (void *)ct));
5769 5754                          if (error == DDI_SUCCESS) {
5770 5755                                  MDI_CLIENT_SET_RESUME(ct);
5771 5756                          } else {
5772 5757                                  MDI_DEBUG(1, (MDI_NOTE, dip,
5773 5758                                      "!client post_resume failed: error %d",
5774 5759                                      error));
5775 5760                                  MDI_CLIENT_SET_SUSPEND(ct);
5776 5761                          }
5777 5762                          break;
5778 5763                  }
5779 5764                  MDI_CLIENT_UNLOCK(ct);
5780 5765          }
5781 5766  }
5782 5767  
5783 5768  /*
5784 5769   * mdi_pre_detach():
5785 5770   *              Pre detach notification handler
5786 5771   */
5787 5772  /*ARGSUSED*/
5788 5773  int
5789 5774  mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5790 5775  {
5791 5776          int rv = DDI_SUCCESS;
5792 5777  
5793 5778          if (MDI_CLIENT(dip)) {
5794 5779                  (void) i_mdi_client_pre_detach(dip, cmd);
5795 5780          }
5796 5781  
5797 5782          if (MDI_PHCI(dip)) {
5798 5783                  rv = i_mdi_phci_pre_detach(dip, cmd);
5799 5784          }
5800 5785  
5801 5786          return (rv);
5802 5787  }
5803 5788  
5804 5789  /*ARGSUSED*/
5805 5790  static int
5806 5791  i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5807 5792  {
5808 5793          int             rv = DDI_SUCCESS;
5809 5794          mdi_phci_t      *ph;
5810 5795          mdi_client_t    *ct;
5811 5796          mdi_pathinfo_t  *pip;
5812 5797          mdi_pathinfo_t  *failed_pip = NULL;
5813 5798          mdi_pathinfo_t  *next;
5814 5799  
5815 5800          ph = i_devi_get_phci(dip);
5816 5801          if (ph == NULL) {
5817 5802                  return (rv);
5818 5803          }
5819 5804  
5820 5805          MDI_PHCI_LOCK(ph);
5821 5806          switch (cmd) {
5822 5807          case DDI_DETACH:
5823 5808                  MDI_DEBUG(2, (MDI_NOTE, dip,
5824 5809                      "pHCI pre_detach: called %p", (void *)ph));
5825 5810                  if (!MDI_PHCI_IS_OFFLINE(ph)) {
5826 5811                          /*
5827 5812                           * mdi_pathinfo nodes are still attached to
5828 5813                           * this pHCI. Fail the detach for this pHCI.
5829 5814                           */
5830 5815                          MDI_DEBUG(2, (MDI_WARN, dip,
5831 5816                              "pHCI pre_detach: paths are still attached %p",
5832 5817                              (void *)ph));
5833 5818                          rv = DDI_FAILURE;
5834 5819                          break;
5835 5820                  }
5836 5821                  MDI_PHCI_SET_DETACH(ph);
5837 5822                  break;
5838 5823  
5839 5824          case DDI_SUSPEND:
5840 5825                  /*
5841 5826                   * pHCI is getting suspended.  Since mpxio client
5842 5827                   * devices may not be suspended at this point, to avoid
5843 5828                   * a potential stack overflow, it is important to suspend
5844 5829                   * client devices before pHCI can be suspended.
5845 5830                   */
5846 5831  
5847 5832                  MDI_DEBUG(2, (MDI_NOTE, dip,
5848 5833                      "pHCI pre_suspend: called %p", (void *)ph));
5849 5834                  /*
5850 5835                   * Suspend all the client devices accessible through this pHCI
5851 5836                   */
5852 5837                  pip = ph->ph_path_head;
5853 5838                  while (pip != NULL && rv == DDI_SUCCESS) {
5854 5839                          dev_info_t *cdip;
5855 5840                          MDI_PI_LOCK(pip);
5856 5841                          next =
5857 5842                              (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5858 5843                          ct = MDI_PI(pip)->pi_client;
5859 5844                          i_mdi_client_lock(ct, pip);
5860 5845                          cdip = ct->ct_dip;
5861 5846                          MDI_PI_UNLOCK(pip);
5862 5847                          if ((MDI_CLIENT_IS_DETACHED(ct) == 0) &&
5863 5848                              MDI_CLIENT_IS_SUSPENDED(ct) == 0) {
5864 5849                                  i_mdi_client_unlock(ct);
5865 5850                                  if ((rv = devi_detach(cdip, DDI_SUSPEND)) !=
5866 5851                                      DDI_SUCCESS) {
5867 5852                                          /*
5868 5853                                           * Suspend of one of the client
5869 5854                                           * device has failed.
5870 5855                                           */
5871 5856                                          MDI_DEBUG(1, (MDI_WARN, dip,
5872 5857                                              "!suspend of device (%s%d) failed.",
5873 5858                                              ddi_driver_name(cdip),
5874 5859                                              ddi_get_instance(cdip)));
5875 5860                                          failed_pip = pip;
5876 5861                                          break;
5877 5862                                  }
5878 5863                          } else {
5879 5864                                  i_mdi_client_unlock(ct);
5880 5865                          }
5881 5866                          pip = next;
5882 5867                  }
5883 5868  
5884 5869                  if (rv == DDI_SUCCESS) {
5885 5870                          /*
5886 5871                           * Suspend of client devices is complete. Proceed
5887 5872                           * with pHCI suspend.
5888 5873                           */
5889 5874                          MDI_PHCI_SET_SUSPEND(ph);
5890 5875                  } else {
5891 5876                          /*
5892 5877                           * Revert back all the suspended client device states
5893 5878                           * to converse.
5894 5879                           */
5895 5880                          pip = ph->ph_path_head;
5896 5881                          while (pip != failed_pip) {
5897 5882                                  dev_info_t *cdip;
5898 5883                                  MDI_PI_LOCK(pip);
5899 5884                                  next =
5900 5885                                      (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5901 5886                                  ct = MDI_PI(pip)->pi_client;
5902 5887                                  i_mdi_client_lock(ct, pip);
5903 5888                                  cdip = ct->ct_dip;
5904 5889                                  MDI_PI_UNLOCK(pip);
5905 5890                                  if (MDI_CLIENT_IS_SUSPENDED(ct)) {
5906 5891                                          i_mdi_client_unlock(ct);
5907 5892                                          (void) devi_attach(cdip, DDI_RESUME);
5908 5893                                  } else {
5909 5894                                          i_mdi_client_unlock(ct);
5910 5895                                  }
5911 5896                                  pip = next;
5912 5897                          }
5913 5898                  }
5914 5899                  break;
5915 5900  
5916 5901          default:
5917 5902                  rv = DDI_FAILURE;
5918 5903                  break;
5919 5904          }
5920 5905          MDI_PHCI_UNLOCK(ph);
5921 5906          return (rv);
5922 5907  }
5923 5908  
5924 5909  /*ARGSUSED*/
5925 5910  static int
5926 5911  i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5927 5912  {
5928 5913          int             rv = DDI_SUCCESS;
5929 5914          mdi_client_t    *ct;
5930 5915  
5931 5916          ct = i_devi_get_client(dip);
5932 5917          if (ct == NULL) {
5933 5918                  return (rv);
5934 5919          }
5935 5920  
5936 5921          MDI_CLIENT_LOCK(ct);
5937 5922          switch (cmd) {
5938 5923          case DDI_DETACH:
5939 5924                  MDI_DEBUG(2, (MDI_NOTE, dip,
5940 5925                      "client pre_detach: called %p",
5941 5926                       (void *)ct));
5942 5927                  MDI_CLIENT_SET_DETACH(ct);
5943 5928                  break;
5944 5929  
5945 5930          case DDI_SUSPEND:
5946 5931                  MDI_DEBUG(2, (MDI_NOTE, dip,
5947 5932                      "client pre_suspend: called %p",
5948 5933                      (void *)ct));
5949 5934                  MDI_CLIENT_SET_SUSPEND(ct);
5950 5935                  break;
5951 5936  
5952 5937          default:
5953 5938                  rv = DDI_FAILURE;
5954 5939                  break;
5955 5940          }
5956 5941          MDI_CLIENT_UNLOCK(ct);
5957 5942          return (rv);
5958 5943  }
5959 5944  
5960 5945  /*
5961 5946   * mdi_post_detach():
5962 5947   *              Post detach notification handler
5963 5948   */
5964 5949  /*ARGSUSED*/
5965 5950  void
5966 5951  mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5967 5952  {
5968 5953          /*
5969 5954           * Detach/Suspend of mpxio component failed. Update our state
5970 5955           * too
5971 5956           */
5972 5957          if (MDI_PHCI(dip))
5973 5958                  i_mdi_phci_post_detach(dip, cmd, error);
5974 5959  
5975 5960          if (MDI_CLIENT(dip))
5976 5961                  i_mdi_client_post_detach(dip, cmd, error);
5977 5962  }
5978 5963  
5979 5964  /*ARGSUSED*/
5980 5965  static void
5981 5966  i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5982 5967  {
5983 5968          mdi_phci_t      *ph;
5984 5969  
5985 5970          /*
5986 5971           * Detach/Suspend of phci component failed. Update our state
5987 5972           * too
5988 5973           */
5989 5974          ph = i_devi_get_phci(dip);
5990 5975          if (ph == NULL) {
5991 5976                  return;
5992 5977          }
5993 5978  
5994 5979          MDI_PHCI_LOCK(ph);
5995 5980          /*
5996 5981           * Detach of pHCI failed. Restore back converse
5997 5982           * state
5998 5983           */
5999 5984          switch (cmd) {
6000 5985          case DDI_DETACH:
6001 5986                  MDI_DEBUG(2, (MDI_NOTE, dip,
6002 5987                      "pHCI post_detach: called %p",
6003 5988                      (void *)ph));
6004 5989                  if (error != DDI_SUCCESS)
6005 5990                          MDI_PHCI_SET_ATTACH(ph);
6006 5991                  break;
6007 5992  
6008 5993          case DDI_SUSPEND:
6009 5994                  MDI_DEBUG(2, (MDI_NOTE, dip,
6010 5995                      "pHCI post_suspend: called %p",
6011 5996                      (void *)ph));
6012 5997                  if (error != DDI_SUCCESS)
6013 5998                          MDI_PHCI_SET_RESUME(ph);
6014 5999                  break;
6015 6000          }
6016 6001          MDI_PHCI_UNLOCK(ph);
6017 6002  }
6018 6003  
6019 6004  /*ARGSUSED*/
6020 6005  static void
6021 6006  i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
6022 6007  {
6023 6008          mdi_client_t    *ct;
6024 6009  
6025 6010          ct = i_devi_get_client(dip);
6026 6011          if (ct == NULL) {
6027 6012                  return;
6028 6013          }
6029 6014          MDI_CLIENT_LOCK(ct);
6030 6015          /*
6031 6016           * Detach of Client failed. Restore back converse
6032 6017           * state
6033 6018           */
6034 6019          switch (cmd) {
6035 6020          case DDI_DETACH:
6036 6021                  MDI_DEBUG(2, (MDI_NOTE, dip,
6037 6022                      "client post_detach: called %p", (void *)ct));
6038 6023                  if (DEVI_IS_ATTACHING(dip)) {
6039 6024                          MDI_DEBUG(4, (MDI_NOTE, dip,
6040 6025                              "i_mdi_pm_rele_client\n"));
6041 6026                          i_mdi_pm_rele_client(ct, ct->ct_path_count);
6042 6027                  } else {
6043 6028                          MDI_DEBUG(4, (MDI_NOTE, dip,
6044 6029                              "i_mdi_pm_reset_client\n"));
6045 6030                          i_mdi_pm_reset_client(ct);
6046 6031                  }
6047 6032                  if (error != DDI_SUCCESS)
6048 6033                          MDI_CLIENT_SET_ATTACH(ct);
6049 6034                  break;
6050 6035  
6051 6036          case DDI_SUSPEND:
6052 6037                  MDI_DEBUG(2, (MDI_NOTE, dip,
6053 6038                      "called %p", (void *)ct));
6054 6039                  if (error != DDI_SUCCESS)
6055 6040                          MDI_CLIENT_SET_RESUME(ct);
6056 6041                  break;
6057 6042          }
6058 6043          MDI_CLIENT_UNLOCK(ct);
6059 6044  }
6060 6045  
6061 6046  int
6062 6047  mdi_pi_kstat_exists(mdi_pathinfo_t *pip)
6063 6048  {
6064 6049          return (MDI_PI(pip)->pi_kstats ? 1 : 0);
6065 6050  }
6066 6051  
6067 6052  /*
  
    | 
      ↓ open down ↓ | 
    1165 lines elided | 
    
      ↑ open up ↑ | 
  
6068 6053   * create and install per-path (client - pHCI) statistics
6069 6054   * I/O stats supported: nread, nwritten, reads, and writes
6070 6055   * Error stats - hard errors, soft errors, & transport errors
6071 6056   */
6072 6057  int
6073 6058  mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname)
6074 6059  {
6075 6060          kstat_t                 *kiosp, *kerrsp;
6076 6061          struct pi_errs          *nsp;
6077 6062          struct mdi_pi_kstats    *mdi_statp;
     6063 +        char                    *errksname;
     6064 +        size_t                  len;
6078 6065  
6079      -        if (MDI_PI(pip)->pi_kstats != NULL)
     6066 +        /*
     6067 +         * If the kstat name was already created nothing to do.
     6068 +         */
     6069 +        if ((kiosp = kstat_hold_byname("mdi", 0, ksname,
     6070 +            ALL_ZONES)) != NULL) {
     6071 +                kstat_rele(kiosp);
6080 6072                  return (MDI_SUCCESS);
     6073 +        }
6081 6074  
6082 6075          if ((kiosp = kstat_create("mdi", 0, ksname, "iopath",
6083 6076              KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) {
6084 6077                  return (MDI_FAILURE);
6085 6078          }
6086 6079  
6087      -        (void) strcat(ksname, ",err");
6088      -        kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors",
     6080 +        len = strlen(ksname) + strlen(",err") + 1;
     6081 +        errksname = kmem_alloc(len, KM_SLEEP);
     6082 +        (void) snprintf(errksname, len, "%s,err", ksname);
     6083 +
     6084 +        kerrsp = kstat_create("mdi", 0, errksname, "iopath_errors",
6089 6085              KSTAT_TYPE_NAMED,
6090 6086              sizeof (struct pi_errs) / sizeof (kstat_named_t), 0);
6091 6087          if (kerrsp == NULL) {
6092 6088                  kstat_delete(kiosp);
     6089 +                kmem_free(errksname, len);
6093 6090                  return (MDI_FAILURE);
6094 6091          }
6095 6092  
6096 6093          nsp = (struct pi_errs *)kerrsp->ks_data;
6097 6094          kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32);
6098 6095          kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32);
6099 6096          kstat_named_init(&nsp->pi_transerrs, "Transport Errors",
6100 6097              KSTAT_DATA_UINT32);
6101 6098          kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy",
6102 6099              KSTAT_DATA_UINT32);
6103 6100          kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors",
6104 6101              KSTAT_DATA_UINT32);
6105 6102          kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources",
6106 6103              KSTAT_DATA_UINT32);
6107 6104          kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors",
6108 6105              KSTAT_DATA_UINT32);
6109 6106          kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State",
6110 6107              KSTAT_DATA_UINT32);
6111 6108          kstat_named_init(&nsp->pi_failedfrom, "Failed From",
  
    | 
      ↓ open down ↓ | 
    9 lines elided | 
    
      ↑ open up ↑ | 
  
6112 6109              KSTAT_DATA_UINT32);
6113 6110          kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32);
6114 6111  
6115 6112          mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP);
6116 6113          mdi_statp->pi_kstat_ref = 1;
6117 6114          mdi_statp->pi_kstat_iostats = kiosp;
6118 6115          mdi_statp->pi_kstat_errstats = kerrsp;
6119 6116          kstat_install(kiosp);
6120 6117          kstat_install(kerrsp);
6121 6118          MDI_PI(pip)->pi_kstats = mdi_statp;
     6119 +        kmem_free(errksname, len);
6122 6120          return (MDI_SUCCESS);
6123 6121  }
6124 6122  
6125 6123  /*
6126 6124   * destroy per-path properties
6127 6125   */
6128 6126  static void
6129 6127  i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip)
6130 6128  {
6131 6129  
6132 6130          struct mdi_pi_kstats *mdi_statp;
6133 6131  
6134 6132          if (MDI_PI(pip)->pi_kstats == NULL)
6135 6133                  return;
6136 6134          if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL)
6137 6135                  return;
6138 6136  
6139 6137          MDI_PI(pip)->pi_kstats = NULL;
6140 6138  
6141 6139          /*
6142 6140           * the kstat may be shared between multiple pathinfo nodes
6143 6141           * decrement this pathinfo's usage, removing the kstats
6144 6142           * themselves when the last pathinfo reference is removed.
6145 6143           */
6146 6144          ASSERT(mdi_statp->pi_kstat_ref > 0);
6147 6145          if (--mdi_statp->pi_kstat_ref != 0)
6148 6146                  return;
6149 6147  
6150 6148          kstat_delete(mdi_statp->pi_kstat_iostats);
6151 6149          kstat_delete(mdi_statp->pi_kstat_errstats);
6152 6150          kmem_free(mdi_statp, sizeof (*mdi_statp));
6153 6151  }
6154 6152  
6155 6153  /*
6156 6154   * update I/O paths KSTATS
6157 6155   */
6158 6156  void
6159 6157  mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp)
6160 6158  {
6161 6159          kstat_t *iostatp;
6162 6160          size_t xfer_cnt;
6163 6161  
6164 6162          ASSERT(pip != NULL);
6165 6163  
6166 6164          /*
6167 6165           * I/O can be driven across a path prior to having path
6168 6166           * statistics available, i.e. probe(9e).
6169 6167           */
6170 6168          if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) {
6171 6169                  iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats;
6172 6170                  xfer_cnt = bp->b_bcount - bp->b_resid;
6173 6171                  if (bp->b_flags & B_READ) {
6174 6172                          KSTAT_IO_PTR(iostatp)->reads++;
6175 6173                          KSTAT_IO_PTR(iostatp)->nread += xfer_cnt;
6176 6174                  } else {
6177 6175                          KSTAT_IO_PTR(iostatp)->writes++;
6178 6176                          KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt;
6179 6177                  }
6180 6178          }
6181 6179  }
6182 6180  
6183 6181  /*
6184 6182   * Enable the path(specific client/target/initiator)
6185 6183   * Enabling a path means that MPxIO may select the enabled path for routing
6186 6184   * future I/O requests, subject to other path state constraints.
6187 6185   */
6188 6186  int
6189 6187  mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags)
6190 6188  {
6191 6189          mdi_phci_t      *ph;
6192 6190  
6193 6191          ph = MDI_PI(pip)->pi_phci;
6194 6192          if (ph == NULL) {
6195 6193                  MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip),
6196 6194                      "!failed: path %s %p: NULL ph",
6197 6195                      mdi_pi_spathname(pip), (void *)pip));
6198 6196                  return (MDI_FAILURE);
6199 6197          }
6200 6198  
6201 6199          (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags,
6202 6200                  MDI_ENABLE_OP);
6203 6201          MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip,
6204 6202              "!returning success pip = %p. ph = %p",
6205 6203              (void *)pip, (void *)ph));
6206 6204          return (MDI_SUCCESS);
6207 6205  
6208 6206  }
6209 6207  
6210 6208  /*
6211 6209   * Disable the path (specific client/target/initiator)
6212 6210   * Disabling a path means that MPxIO will not select the disabled path for
6213 6211   * routing any new I/O requests.
6214 6212   */
6215 6213  int
6216 6214  mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags)
6217 6215  {
6218 6216          mdi_phci_t      *ph;
6219 6217  
6220 6218          ph = MDI_PI(pip)->pi_phci;
6221 6219          if (ph == NULL) {
6222 6220                  MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip),
6223 6221                      "!failed: path %s %p: NULL ph",
6224 6222                      mdi_pi_spathname(pip), (void *)pip));
6225 6223                  return (MDI_FAILURE);
6226 6224          }
6227 6225  
6228 6226          (void) i_mdi_enable_disable_path(pip,
6229 6227              ph->ph_vhci, flags, MDI_DISABLE_OP);
6230 6228          MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip,
6231 6229              "!returning success pip = %p. ph = %p",
6232 6230              (void *)pip, (void *)ph));
6233 6231          return (MDI_SUCCESS);
6234 6232  }
6235 6233  
6236 6234  /*
6237 6235   * disable the path to a particular pHCI (pHCI specified in the phci_path
6238 6236   * argument) for a particular client (specified in the client_path argument).
6239 6237   * Disabling a path means that MPxIO will not select the disabled path for
6240 6238   * routing any new I/O requests.
6241 6239   * NOTE: this will be removed once the NWS files are changed to use the new
6242 6240   * mdi_{enable,disable}_path interfaces
6243 6241   */
6244 6242  int
6245 6243  mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags)
6246 6244  {
6247 6245          return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP));
6248 6246  }
6249 6247  
6250 6248  /*
6251 6249   * Enable the path to a particular pHCI (pHCI specified in the phci_path
6252 6250   * argument) for a particular client (specified in the client_path argument).
6253 6251   * Enabling a path means that MPxIO may select the enabled path for routing
6254 6252   * future I/O requests, subject to other path state constraints.
6255 6253   * NOTE: this will be removed once the NWS files are changed to use the new
6256 6254   * mdi_{enable,disable}_path interfaces
6257 6255   */
6258 6256  
6259 6257  int
6260 6258  mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags)
6261 6259  {
6262 6260          return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP));
6263 6261  }
6264 6262  
6265 6263  /*
6266 6264   * Common routine for doing enable/disable.
6267 6265   */
6268 6266  static mdi_pathinfo_t *
6269 6267  i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags,
6270 6268                  int op)
6271 6269  {
6272 6270          int             sync_flag = 0;
6273 6271          int             rv;
6274 6272          mdi_pathinfo_t  *next;
6275 6273          int             (*f)() = NULL;
6276 6274  
6277 6275          /*
6278 6276           * Check to make sure the path is not already in the
6279 6277           * requested state. If it is just return the next path
6280 6278           * as we have nothing to do here.
6281 6279           */
6282 6280          if ((MDI_PI_IS_DISABLE(pip) && op == MDI_DISABLE_OP) ||
6283 6281              (!MDI_PI_IS_DISABLE(pip) && op == MDI_ENABLE_OP)) {
6284 6282                  MDI_PI_LOCK(pip);
6285 6283                  next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
6286 6284                  MDI_PI_UNLOCK(pip);
6287 6285                  return (next);
6288 6286          }
6289 6287  
6290 6288          f = vh->vh_ops->vo_pi_state_change;
6291 6289  
6292 6290          sync_flag = (flags << 8) & 0xf00;
6293 6291  
6294 6292          /*
6295 6293           * Do a callback into the mdi consumer to let it
6296 6294           * know that path is about to get enabled/disabled.
6297 6295           */
6298 6296          if (f != NULL) {
6299 6297                  rv = (*f)(vh->vh_dip, pip, 0,
6300 6298                          MDI_PI_EXT_STATE(pip),
6301 6299                          MDI_EXT_STATE_CHANGE | sync_flag |
6302 6300                          op | MDI_BEFORE_STATE_CHANGE);
6303 6301                  if (rv != MDI_SUCCESS) {
6304 6302                          MDI_DEBUG(2, (MDI_WARN, vh->vh_dip,
6305 6303                              "vo_pi_state_change: failed rv = %x", rv));
6306 6304                  }
6307 6305          }
6308 6306          MDI_PI_LOCK(pip);
6309 6307          next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
6310 6308  
6311 6309          switch (flags) {
6312 6310                  case USER_DISABLE:
6313 6311                          if (op == MDI_DISABLE_OP) {
6314 6312                                  MDI_PI_SET_USER_DISABLE(pip);
6315 6313                          } else {
6316 6314                                  MDI_PI_SET_USER_ENABLE(pip);
6317 6315                          }
6318 6316                          break;
6319 6317                  case DRIVER_DISABLE:
6320 6318                          if (op == MDI_DISABLE_OP) {
6321 6319                                  MDI_PI_SET_DRV_DISABLE(pip);
6322 6320                          } else {
6323 6321                                  MDI_PI_SET_DRV_ENABLE(pip);
6324 6322                          }
6325 6323                          break;
6326 6324                  case DRIVER_DISABLE_TRANSIENT:
6327 6325                          if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) {
6328 6326                                  MDI_PI_SET_DRV_DISABLE_TRANS(pip);
6329 6327                          } else {
6330 6328                                  MDI_PI_SET_DRV_ENABLE_TRANS(pip);
6331 6329                          }
6332 6330                          break;
6333 6331          }
6334 6332          MDI_PI_UNLOCK(pip);
6335 6333          /*
6336 6334           * Do a callback into the mdi consumer to let it
6337 6335           * know that path is now enabled/disabled.
6338 6336           */
6339 6337          if (f != NULL) {
6340 6338                  rv = (*f)(vh->vh_dip, pip, 0,
6341 6339                          MDI_PI_EXT_STATE(pip),
6342 6340                          MDI_EXT_STATE_CHANGE | sync_flag |
6343 6341                          op | MDI_AFTER_STATE_CHANGE);
6344 6342                  if (rv != MDI_SUCCESS) {
6345 6343                          MDI_DEBUG(2, (MDI_WARN, vh->vh_dip,
6346 6344                              "vo_pi_state_change failed: rv = %x", rv));
6347 6345                  }
6348 6346          }
6349 6347          return (next);
6350 6348  }
6351 6349  
6352 6350  /*
6353 6351   * Common routine for doing enable/disable.
6354 6352   * NOTE: this will be removed once the NWS files are changed to use the new
6355 6353   * mdi_{enable,disable}_path has been putback
6356 6354   */
6357 6355  int
6358 6356  i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op)
6359 6357  {
6360 6358  
6361 6359          mdi_phci_t      *ph;
6362 6360          mdi_vhci_t      *vh = NULL;
6363 6361          mdi_client_t    *ct;
6364 6362          mdi_pathinfo_t  *next, *pip;
6365 6363          int             found_it;
6366 6364  
6367 6365          ph = i_devi_get_phci(pdip);
6368 6366          MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip,
6369 6367              "!op = %d pdip = %p cdip = %p", op, (void *)pdip,
6370 6368              (void *)cdip));
6371 6369          if (ph == NULL) {
6372 6370                  MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6373 6371                      "!failed: operation %d: NULL ph", op));
6374 6372                  return (MDI_FAILURE);
6375 6373          }
6376 6374  
6377 6375          if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) {
6378 6376                  MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6379 6377                      "!failed: invalid operation %d", op));
6380 6378                  return (MDI_FAILURE);
6381 6379          }
6382 6380  
6383 6381          vh = ph->ph_vhci;
6384 6382  
6385 6383          if (cdip == NULL) {
6386 6384                  /*
6387 6385                   * Need to mark the Phci as enabled/disabled.
6388 6386                   */
6389 6387                  MDI_DEBUG(4, (MDI_NOTE, cdip ? cdip : pdip,
6390 6388                      "op %d for the phci", op));
6391 6389                  MDI_PHCI_LOCK(ph);
6392 6390                  switch (flags) {
6393 6391                          case USER_DISABLE:
6394 6392                                  if (op == MDI_DISABLE_OP) {
6395 6393                                          MDI_PHCI_SET_USER_DISABLE(ph);
6396 6394                                  } else {
6397 6395                                          MDI_PHCI_SET_USER_ENABLE(ph);
6398 6396                                  }
6399 6397                                  break;
6400 6398                          case DRIVER_DISABLE:
6401 6399                                  if (op == MDI_DISABLE_OP) {
6402 6400                                          MDI_PHCI_SET_DRV_DISABLE(ph);
6403 6401                                  } else {
6404 6402                                          MDI_PHCI_SET_DRV_ENABLE(ph);
6405 6403                                  }
6406 6404                                  break;
6407 6405                          case DRIVER_DISABLE_TRANSIENT:
6408 6406                                  if (op == MDI_DISABLE_OP) {
6409 6407                                          MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph);
6410 6408                                  } else {
6411 6409                                          MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph);
6412 6410                                  }
6413 6411                                  break;
6414 6412                          default:
6415 6413                                  MDI_PHCI_UNLOCK(ph);
6416 6414                                  MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6417 6415                                      "!invalid flag argument= %d", flags));
6418 6416                  }
6419 6417  
6420 6418                  /*
6421 6419                   * Phci has been disabled. Now try to enable/disable
6422 6420                   * path info's to each client.
6423 6421                   */
6424 6422                  pip = ph->ph_path_head;
6425 6423                  while (pip != NULL) {
6426 6424                          pip = i_mdi_enable_disable_path(pip, vh, flags, op);
6427 6425                  }
6428 6426                  MDI_PHCI_UNLOCK(ph);
6429 6427          } else {
6430 6428  
6431 6429                  /*
6432 6430                   * Disable a specific client.
6433 6431                   */
6434 6432                  ct = i_devi_get_client(cdip);
6435 6433                  if (ct == NULL) {
6436 6434                          MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6437 6435                              "!failed: operation = %d: NULL ct", op));
6438 6436                          return (MDI_FAILURE);
6439 6437                  }
6440 6438  
6441 6439                  MDI_CLIENT_LOCK(ct);
6442 6440                  pip = ct->ct_path_head;
6443 6441                  found_it = 0;
6444 6442                  while (pip != NULL) {
6445 6443                          MDI_PI_LOCK(pip);
6446 6444                          next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6447 6445                          if (MDI_PI(pip)->pi_phci == ph) {
6448 6446                                  MDI_PI_UNLOCK(pip);
6449 6447                                  found_it = 1;
6450 6448                                  break;
6451 6449                          }
6452 6450                          MDI_PI_UNLOCK(pip);
6453 6451                          pip = next;
6454 6452                  }
6455 6453  
6456 6454  
6457 6455                  MDI_CLIENT_UNLOCK(ct);
6458 6456                  if (found_it == 0) {
6459 6457                          MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6460 6458                              "!failed. Could not find corresponding pip\n"));
6461 6459                          return (MDI_FAILURE);
6462 6460                  }
6463 6461  
6464 6462                  (void) i_mdi_enable_disable_path(pip, vh, flags, op);
6465 6463          }
6466 6464  
6467 6465          MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip,
6468 6466              "!op %d returning success pdip = %p cdip = %p",
6469 6467              op, (void *)pdip, (void *)cdip));
6470 6468          return (MDI_SUCCESS);
6471 6469  }
6472 6470  
6473 6471  /*
6474 6472   * Ensure phci powered up
6475 6473   */
6476 6474  static void
6477 6475  i_mdi_pm_hold_pip(mdi_pathinfo_t *pip)
6478 6476  {
6479 6477          dev_info_t      *ph_dip;
6480 6478  
6481 6479          ASSERT(pip != NULL);
6482 6480          ASSERT(MDI_PI_LOCKED(pip));
6483 6481  
6484 6482          if (MDI_PI(pip)->pi_pm_held) {
6485 6483                  return;
6486 6484          }
6487 6485  
6488 6486          ph_dip = mdi_pi_get_phci(pip);
6489 6487          MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6490 6488              "%s %p", mdi_pi_spathname(pip), (void *)pip));
6491 6489          if (ph_dip == NULL) {
6492 6490                  return;
6493 6491          }
6494 6492  
6495 6493          MDI_PI_UNLOCK(pip);
6496 6494          MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt was %d",
6497 6495              DEVI(ph_dip)->devi_pm_kidsupcnt));
6498 6496          pm_hold_power(ph_dip);
6499 6497          MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt is %d",
6500 6498              DEVI(ph_dip)->devi_pm_kidsupcnt));
6501 6499          MDI_PI_LOCK(pip);
6502 6500  
6503 6501          /* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */
6504 6502          if (DEVI(ph_dip)->devi_pm_info)
6505 6503                  MDI_PI(pip)->pi_pm_held = 1;
6506 6504  }
6507 6505  
6508 6506  /*
6509 6507   * Allow phci powered down
6510 6508   */
6511 6509  static void
6512 6510  i_mdi_pm_rele_pip(mdi_pathinfo_t *pip)
6513 6511  {
6514 6512          dev_info_t      *ph_dip = NULL;
6515 6513  
6516 6514          ASSERT(pip != NULL);
6517 6515          ASSERT(MDI_PI_LOCKED(pip));
6518 6516  
6519 6517          if (MDI_PI(pip)->pi_pm_held == 0) {
6520 6518                  return;
6521 6519          }
6522 6520  
6523 6521          ph_dip = mdi_pi_get_phci(pip);
6524 6522          ASSERT(ph_dip != NULL);
6525 6523  
6526 6524          MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6527 6525              "%s %p", mdi_pi_spathname(pip), (void *)pip));
6528 6526  
6529 6527          MDI_PI_UNLOCK(pip);
6530 6528          MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6531 6529              "kidsupcnt was %d", DEVI(ph_dip)->devi_pm_kidsupcnt));
6532 6530          pm_rele_power(ph_dip);
6533 6531          MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6534 6532              "kidsupcnt is %d", DEVI(ph_dip)->devi_pm_kidsupcnt));
6535 6533          MDI_PI_LOCK(pip);
6536 6534  
6537 6535          MDI_PI(pip)->pi_pm_held = 0;
6538 6536  }
6539 6537  
6540 6538  static void
6541 6539  i_mdi_pm_hold_client(mdi_client_t *ct, int incr)
6542 6540  {
6543 6541          ASSERT(MDI_CLIENT_LOCKED(ct));
6544 6542  
6545 6543          ct->ct_power_cnt += incr;
6546 6544          MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6547 6545              "%p ct_power_cnt = %d incr = %d",
6548 6546              (void *)ct, ct->ct_power_cnt, incr));
6549 6547          ASSERT(ct->ct_power_cnt >= 0);
6550 6548  }
6551 6549  
6552 6550  static void
6553 6551  i_mdi_rele_all_phci(mdi_client_t *ct)
6554 6552  {
6555 6553          mdi_pathinfo_t  *pip;
6556 6554  
6557 6555          ASSERT(MDI_CLIENT_LOCKED(ct));
6558 6556          pip = (mdi_pathinfo_t *)ct->ct_path_head;
6559 6557          while (pip != NULL) {
6560 6558                  mdi_hold_path(pip);
6561 6559                  MDI_PI_LOCK(pip);
6562 6560                  i_mdi_pm_rele_pip(pip);
6563 6561                  MDI_PI_UNLOCK(pip);
6564 6562                  mdi_rele_path(pip);
6565 6563                  pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6566 6564          }
6567 6565  }
6568 6566  
6569 6567  static void
6570 6568  i_mdi_pm_rele_client(mdi_client_t *ct, int decr)
6571 6569  {
6572 6570          ASSERT(MDI_CLIENT_LOCKED(ct));
6573 6571  
6574 6572          if (i_ddi_devi_attached(ct->ct_dip)) {
6575 6573                  ct->ct_power_cnt -= decr;
6576 6574                  MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6577 6575                      "%p ct_power_cnt = %d decr = %d",
6578 6576                      (void *)ct, ct->ct_power_cnt, decr));
6579 6577          }
6580 6578  
6581 6579          ASSERT(ct->ct_power_cnt >= 0);
6582 6580          if (ct->ct_power_cnt == 0) {
6583 6581                  i_mdi_rele_all_phci(ct);
6584 6582                  return;
6585 6583          }
6586 6584  }
6587 6585  
6588 6586  static void
6589 6587  i_mdi_pm_reset_client(mdi_client_t *ct)
6590 6588  {
6591 6589          MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6592 6590              "%p ct_power_cnt = %d", (void *)ct, ct->ct_power_cnt));
6593 6591          ASSERT(MDI_CLIENT_LOCKED(ct));
6594 6592          ct->ct_power_cnt = 0;
6595 6593          i_mdi_rele_all_phci(ct);
6596 6594          ct->ct_powercnt_config = 0;
6597 6595          ct->ct_powercnt_unconfig = 0;
6598 6596          ct->ct_powercnt_reset = 1;
6599 6597  }
6600 6598  
6601 6599  static int
6602 6600  i_mdi_power_one_phci(mdi_pathinfo_t *pip)
6603 6601  {
6604 6602          int             ret;
6605 6603          dev_info_t      *ph_dip;
6606 6604  
6607 6605          MDI_PI_LOCK(pip);
6608 6606          i_mdi_pm_hold_pip(pip);
6609 6607  
6610 6608          ph_dip = mdi_pi_get_phci(pip);
6611 6609          MDI_PI_UNLOCK(pip);
6612 6610  
6613 6611          /* bring all components of phci to full power */
6614 6612          MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6615 6613              "pm_powerup for %s%d %p", ddi_driver_name(ph_dip),
6616 6614              ddi_get_instance(ph_dip), (void *)pip));
6617 6615  
6618 6616          ret = pm_powerup(ph_dip);
6619 6617  
6620 6618          if (ret == DDI_FAILURE) {
6621 6619                  MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6622 6620                      "pm_powerup FAILED for %s%d %p",
6623 6621                      ddi_driver_name(ph_dip), ddi_get_instance(ph_dip),
6624 6622                      (void *)pip));
6625 6623  
6626 6624                  MDI_PI_LOCK(pip);
6627 6625                  i_mdi_pm_rele_pip(pip);
6628 6626                  MDI_PI_UNLOCK(pip);
6629 6627                  return (MDI_FAILURE);
6630 6628          }
6631 6629  
6632 6630          return (MDI_SUCCESS);
6633 6631  }
6634 6632  
6635 6633  static int
6636 6634  i_mdi_power_all_phci(mdi_client_t *ct)
6637 6635  {
6638 6636          mdi_pathinfo_t  *pip;
6639 6637          int             succeeded = 0;
6640 6638  
6641 6639          ASSERT(MDI_CLIENT_LOCKED(ct));
6642 6640          pip = (mdi_pathinfo_t *)ct->ct_path_head;
6643 6641          while (pip != NULL) {
6644 6642                  /*
6645 6643                   * Don't power if MDI_PATHINFO_STATE_FAULT
6646 6644                   * or MDI_PATHINFO_STATE_OFFLINE.
6647 6645                   */
6648 6646                  if (MDI_PI_IS_INIT(pip) ||
6649 6647                      MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) {
6650 6648                          mdi_hold_path(pip);
6651 6649                          MDI_CLIENT_UNLOCK(ct);
6652 6650                          if (i_mdi_power_one_phci(pip) == MDI_SUCCESS)
6653 6651                                  succeeded = 1;
6654 6652  
6655 6653                          ASSERT(ct == MDI_PI(pip)->pi_client);
6656 6654                          MDI_CLIENT_LOCK(ct);
6657 6655                          mdi_rele_path(pip);
6658 6656                  }
6659 6657                  pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6660 6658          }
6661 6659  
6662 6660          return (succeeded ? MDI_SUCCESS : MDI_FAILURE);
6663 6661  }
6664 6662  
6665 6663  /*
6666 6664   * mdi_bus_power():
6667 6665   *              1. Place the phci(s) into powered up state so that
6668 6666   *                 client can do power management
6669 6667   *              2. Ensure phci powered up as client power managing
6670 6668   * Return Values:
6671 6669   *              MDI_SUCCESS
6672 6670   *              MDI_FAILURE
6673 6671   */
6674 6672  int
6675 6673  mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op,
6676 6674      void *arg, void *result)
6677 6675  {
6678 6676          int                     ret = MDI_SUCCESS;
6679 6677          pm_bp_child_pwrchg_t    *bpc;
6680 6678          mdi_client_t            *ct;
6681 6679          dev_info_t              *cdip;
6682 6680          pm_bp_has_changed_t     *bphc;
6683 6681  
6684 6682          /*
6685 6683           * BUS_POWER_NOINVOL not supported
6686 6684           */
6687 6685          if (op == BUS_POWER_NOINVOL)
6688 6686                  return (MDI_FAILURE);
6689 6687  
6690 6688          /*
6691 6689           * ignore other OPs.
6692 6690           * return quickly to save cou cycles on the ct processing
6693 6691           */
6694 6692          switch (op) {
6695 6693          case BUS_POWER_PRE_NOTIFICATION:
6696 6694          case BUS_POWER_POST_NOTIFICATION:
6697 6695                  bpc = (pm_bp_child_pwrchg_t *)arg;
6698 6696                  cdip = bpc->bpc_dip;
6699 6697                  break;
6700 6698          case BUS_POWER_HAS_CHANGED:
6701 6699                  bphc = (pm_bp_has_changed_t *)arg;
6702 6700                  cdip = bphc->bphc_dip;
6703 6701                  break;
6704 6702          default:
6705 6703                  return (pm_busop_bus_power(parent, impl_arg, op, arg, result));
6706 6704          }
6707 6705  
6708 6706          ASSERT(MDI_CLIENT(cdip));
6709 6707  
6710 6708          ct = i_devi_get_client(cdip);
6711 6709          if (ct == NULL)
6712 6710                  return (MDI_FAILURE);
6713 6711  
6714 6712          /*
6715 6713           * wait till the mdi_pathinfo node state change are processed
6716 6714           */
6717 6715          MDI_CLIENT_LOCK(ct);
6718 6716          switch (op) {
6719 6717          case BUS_POWER_PRE_NOTIFICATION:
6720 6718                  MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6721 6719                      "BUS_POWER_PRE_NOTIFICATION:"
6722 6720                      "%s@%s, olevel=%d, nlevel=%d, comp=%d",
6723 6721                      ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6724 6722                      bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp));
6725 6723  
6726 6724                  /* serialize power level change per client */
6727 6725                  while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6728 6726                          cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6729 6727  
6730 6728                  MDI_CLIENT_SET_POWER_TRANSITION(ct);
6731 6729  
6732 6730                  if (ct->ct_power_cnt == 0) {
6733 6731                          ret = i_mdi_power_all_phci(ct);
6734 6732                  }
6735 6733  
6736 6734                  /*
6737 6735                   * if new_level > 0:
6738 6736                   *      - hold phci(s)
6739 6737                   *      - power up phci(s) if not already
6740 6738                   * ignore power down
6741 6739                   */
6742 6740                  if (bpc->bpc_nlevel > 0) {
6743 6741                          if (!DEVI_IS_ATTACHING(ct->ct_dip)) {
6744 6742                                  MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6745 6743                                      "i_mdi_pm_hold_client\n"));
6746 6744                                  i_mdi_pm_hold_client(ct, ct->ct_path_count);
6747 6745                          }
6748 6746                  }
6749 6747                  break;
6750 6748          case BUS_POWER_POST_NOTIFICATION:
6751 6749                  MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6752 6750                      "BUS_POWER_POST_NOTIFICATION:"
6753 6751                      "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d",
6754 6752                      ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6755 6753                      bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp,
6756 6754                      *(int *)result));
6757 6755  
6758 6756                  if (*(int *)result == DDI_SUCCESS) {
6759 6757                          if (bpc->bpc_nlevel > 0) {
6760 6758                                  MDI_CLIENT_SET_POWER_UP(ct);
6761 6759                          } else {
6762 6760                                  MDI_CLIENT_SET_POWER_DOWN(ct);
6763 6761                          }
6764 6762                  }
6765 6763  
6766 6764                  /* release the hold we did in pre-notification */
6767 6765                  if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) &&
6768 6766                      !DEVI_IS_ATTACHING(ct->ct_dip)) {
6769 6767                          MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6770 6768                              "i_mdi_pm_rele_client\n"));
6771 6769                          i_mdi_pm_rele_client(ct, ct->ct_path_count);
6772 6770                  }
6773 6771  
6774 6772                  if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) {
6775 6773                          /* another thread might started attaching */
6776 6774                          if (DEVI_IS_ATTACHING(ct->ct_dip)) {
6777 6775                                  MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6778 6776                                      "i_mdi_pm_rele_client\n"));
6779 6777                                  i_mdi_pm_rele_client(ct, ct->ct_path_count);
6780 6778                          /* detaching has been taken care in pm_post_unconfig */
6781 6779                          } else if (!DEVI_IS_DETACHING(ct->ct_dip)) {
6782 6780                                  MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6783 6781                                      "i_mdi_pm_reset_client\n"));
6784 6782                                  i_mdi_pm_reset_client(ct);
6785 6783                          }
6786 6784                  }
6787 6785  
6788 6786                  MDI_CLIENT_CLEAR_POWER_TRANSITION(ct);
6789 6787                  cv_broadcast(&ct->ct_powerchange_cv);
6790 6788  
6791 6789                  break;
6792 6790  
6793 6791          /* need to do more */
6794 6792          case BUS_POWER_HAS_CHANGED:
6795 6793                  MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6796 6794                      "BUS_POWER_HAS_CHANGED:"
6797 6795                      "%s@%s, olevel=%d, nlevel=%d, comp=%d",
6798 6796                      ddi_node_name(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip),
6799 6797                      bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp));
6800 6798  
6801 6799                  if (bphc->bphc_nlevel > 0 &&
6802 6800                      bphc->bphc_nlevel > bphc->bphc_olevel) {
6803 6801                          if (ct->ct_power_cnt == 0) {
6804 6802                                  ret = i_mdi_power_all_phci(ct);
6805 6803                          }
6806 6804                          MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6807 6805                              "i_mdi_pm_hold_client\n"));
6808 6806                          i_mdi_pm_hold_client(ct, ct->ct_path_count);
6809 6807                  }
6810 6808  
6811 6809                  if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) {
6812 6810                          MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6813 6811                              "i_mdi_pm_rele_client\n"));
6814 6812                          i_mdi_pm_rele_client(ct, ct->ct_path_count);
6815 6813                  }
6816 6814                  break;
6817 6815          }
6818 6816  
6819 6817          MDI_CLIENT_UNLOCK(ct);
6820 6818          return (ret);
6821 6819  }
6822 6820  
6823 6821  static int
6824 6822  i_mdi_pm_pre_config_one(dev_info_t *child)
6825 6823  {
6826 6824          int             ret = MDI_SUCCESS;
6827 6825          mdi_client_t    *ct;
6828 6826  
6829 6827          ct = i_devi_get_client(child);
6830 6828          if (ct == NULL)
6831 6829                  return (MDI_FAILURE);
6832 6830  
6833 6831          MDI_CLIENT_LOCK(ct);
6834 6832          while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6835 6833                  cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6836 6834  
6837 6835          if (!MDI_CLIENT_IS_FAILED(ct)) {
6838 6836                  MDI_CLIENT_UNLOCK(ct);
6839 6837                  MDI_DEBUG(4, (MDI_NOTE, child, "already configured\n"));
6840 6838                  return (MDI_SUCCESS);
6841 6839          }
6842 6840  
6843 6841          if (ct->ct_powercnt_config) {
6844 6842                  MDI_CLIENT_UNLOCK(ct);
6845 6843                  MDI_DEBUG(4, (MDI_NOTE, child, "already held\n"));
6846 6844                  return (MDI_SUCCESS);
6847 6845          }
6848 6846  
6849 6847          if (ct->ct_power_cnt == 0) {
6850 6848                  ret = i_mdi_power_all_phci(ct);
6851 6849          }
6852 6850          MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n"));
6853 6851          i_mdi_pm_hold_client(ct, ct->ct_path_count);
6854 6852          ct->ct_powercnt_config = 1;
6855 6853          ct->ct_powercnt_reset = 0;
6856 6854          MDI_CLIENT_UNLOCK(ct);
6857 6855          return (ret);
6858 6856  }
6859 6857  
6860 6858  static int
6861 6859  i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child)
6862 6860  {
6863 6861          int                     ret = MDI_SUCCESS;
6864 6862          dev_info_t              *cdip;
6865 6863          int                     circ;
6866 6864  
6867 6865          ASSERT(MDI_VHCI(vdip));
6868 6866  
6869 6867          /* ndi_devi_config_one */
6870 6868          if (child) {
6871 6869                  ASSERT(DEVI_BUSY_OWNED(vdip));
6872 6870                  return (i_mdi_pm_pre_config_one(child));
6873 6871          }
6874 6872  
6875 6873          /* devi_config_common */
6876 6874          ndi_devi_enter(vdip, &circ);
6877 6875          cdip = ddi_get_child(vdip);
6878 6876          while (cdip) {
6879 6877                  dev_info_t *next = ddi_get_next_sibling(cdip);
6880 6878  
6881 6879                  ret = i_mdi_pm_pre_config_one(cdip);
6882 6880                  if (ret != MDI_SUCCESS)
6883 6881                          break;
6884 6882                  cdip = next;
6885 6883          }
6886 6884          ndi_devi_exit(vdip, circ);
6887 6885          return (ret);
6888 6886  }
6889 6887  
6890 6888  static int
6891 6889  i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags)
6892 6890  {
6893 6891          int             ret = MDI_SUCCESS;
6894 6892          mdi_client_t    *ct;
6895 6893  
6896 6894          ct = i_devi_get_client(child);
6897 6895          if (ct == NULL)
6898 6896                  return (MDI_FAILURE);
6899 6897  
6900 6898          MDI_CLIENT_LOCK(ct);
6901 6899          while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6902 6900                  cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6903 6901  
6904 6902          if (!i_ddi_devi_attached(child)) {
6905 6903                  MDI_DEBUG(4, (MDI_NOTE, child, "node detached already\n"));
6906 6904                  MDI_CLIENT_UNLOCK(ct);
6907 6905                  return (MDI_SUCCESS);
6908 6906          }
6909 6907  
6910 6908          if (MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6911 6909              (flags & NDI_AUTODETACH)) {
6912 6910                  MDI_DEBUG(4, (MDI_NOTE, child, "auto-modunload\n"));
6913 6911                  MDI_CLIENT_UNLOCK(ct);
6914 6912                  return (MDI_FAILURE);
6915 6913          }
6916 6914  
6917 6915          if (ct->ct_powercnt_unconfig) {
6918 6916                  MDI_DEBUG(4, (MDI_NOTE, child, "ct_powercnt_held\n"));
6919 6917                  MDI_CLIENT_UNLOCK(ct);
6920 6918                  *held = 1;
6921 6919                  return (MDI_SUCCESS);
6922 6920          }
6923 6921  
6924 6922          if (ct->ct_power_cnt == 0) {
6925 6923                  ret = i_mdi_power_all_phci(ct);
6926 6924          }
6927 6925          MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n"));
6928 6926          i_mdi_pm_hold_client(ct, ct->ct_path_count);
6929 6927          ct->ct_powercnt_unconfig = 1;
6930 6928          ct->ct_powercnt_reset = 0;
6931 6929          MDI_CLIENT_UNLOCK(ct);
6932 6930          if (ret == MDI_SUCCESS)
6933 6931                  *held = 1;
6934 6932          return (ret);
6935 6933  }
6936 6934  
6937 6935  static int
6938 6936  i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held,
6939 6937      int flags)
6940 6938  {
6941 6939          int                     ret = MDI_SUCCESS;
6942 6940          dev_info_t              *cdip;
6943 6941          int                     circ;
6944 6942  
6945 6943          ASSERT(MDI_VHCI(vdip));
6946 6944          *held = 0;
6947 6945  
6948 6946          /* ndi_devi_unconfig_one */
6949 6947          if (child) {
6950 6948                  ASSERT(DEVI_BUSY_OWNED(vdip));
6951 6949                  return (i_mdi_pm_pre_unconfig_one(child, held, flags));
6952 6950          }
6953 6951  
6954 6952          /* devi_unconfig_common */
6955 6953          ndi_devi_enter(vdip, &circ);
6956 6954          cdip = ddi_get_child(vdip);
6957 6955          while (cdip) {
6958 6956                  dev_info_t *next = ddi_get_next_sibling(cdip);
6959 6957  
6960 6958                  ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags);
6961 6959                  cdip = next;
6962 6960          }
6963 6961          ndi_devi_exit(vdip, circ);
6964 6962  
6965 6963          if (*held)
6966 6964                  ret = MDI_SUCCESS;
6967 6965  
6968 6966          return (ret);
6969 6967  }
6970 6968  
6971 6969  static void
6972 6970  i_mdi_pm_post_config_one(dev_info_t *child)
6973 6971  {
6974 6972          mdi_client_t    *ct;
6975 6973  
6976 6974          ct = i_devi_get_client(child);
6977 6975          if (ct == NULL)
6978 6976                  return;
6979 6977  
6980 6978          MDI_CLIENT_LOCK(ct);
6981 6979          while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6982 6980                  cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6983 6981  
6984 6982          if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) {
6985 6983                  MDI_DEBUG(4, (MDI_NOTE, child, "not configured\n"));
6986 6984                  MDI_CLIENT_UNLOCK(ct);
6987 6985                  return;
6988 6986          }
6989 6987  
6990 6988          /* client has not been updated */
6991 6989          if (MDI_CLIENT_IS_FAILED(ct)) {
6992 6990                  MDI_DEBUG(4, (MDI_NOTE, child, "client failed\n"));
6993 6991                  MDI_CLIENT_UNLOCK(ct);
6994 6992                  return;
6995 6993          }
6996 6994  
6997 6995          /* another thread might have powered it down or detached it */
6998 6996          if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6999 6997              !DEVI_IS_ATTACHING(child)) ||
7000 6998              (!i_ddi_devi_attached(child) &&
7001 6999              !DEVI_IS_ATTACHING(child))) {
7002 7000                  MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n"));
7003 7001                  i_mdi_pm_reset_client(ct);
7004 7002          } else {
7005 7003                  mdi_pathinfo_t  *pip, *next;
7006 7004                  int     valid_path_count = 0;
7007 7005  
7008 7006                  MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n"));
7009 7007                  pip = ct->ct_path_head;
7010 7008                  while (pip != NULL) {
7011 7009                          MDI_PI_LOCK(pip);
7012 7010                          next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
7013 7011                          if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
7014 7012                                  valid_path_count ++;
7015 7013                          MDI_PI_UNLOCK(pip);
7016 7014                          pip = next;
7017 7015                  }
7018 7016                  i_mdi_pm_rele_client(ct, valid_path_count);
7019 7017          }
7020 7018          ct->ct_powercnt_config = 0;
7021 7019          MDI_CLIENT_UNLOCK(ct);
7022 7020  }
7023 7021  
7024 7022  static void
7025 7023  i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child)
7026 7024  {
7027 7025          int             circ;
7028 7026          dev_info_t      *cdip;
7029 7027  
7030 7028          ASSERT(MDI_VHCI(vdip));
7031 7029  
7032 7030          /* ndi_devi_config_one */
7033 7031          if (child) {
7034 7032                  ASSERT(DEVI_BUSY_OWNED(vdip));
7035 7033                  i_mdi_pm_post_config_one(child);
7036 7034                  return;
7037 7035          }
7038 7036  
7039 7037          /* devi_config_common */
7040 7038          ndi_devi_enter(vdip, &circ);
7041 7039          cdip = ddi_get_child(vdip);
7042 7040          while (cdip) {
7043 7041                  dev_info_t *next = ddi_get_next_sibling(cdip);
7044 7042  
7045 7043                  i_mdi_pm_post_config_one(cdip);
7046 7044                  cdip = next;
7047 7045          }
7048 7046          ndi_devi_exit(vdip, circ);
7049 7047  }
7050 7048  
7051 7049  static void
7052 7050  i_mdi_pm_post_unconfig_one(dev_info_t *child)
7053 7051  {
7054 7052          mdi_client_t    *ct;
7055 7053  
7056 7054          ct = i_devi_get_client(child);
7057 7055          if (ct == NULL)
7058 7056                  return;
7059 7057  
7060 7058          MDI_CLIENT_LOCK(ct);
7061 7059          while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
7062 7060                  cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
7063 7061  
7064 7062          if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) {
7065 7063                  MDI_DEBUG(4, (MDI_NOTE, child, "not held\n"));
7066 7064                  MDI_CLIENT_UNLOCK(ct);
7067 7065                  return;
7068 7066          }
7069 7067  
7070 7068          /* failure detaching or another thread just attached it */
7071 7069          if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
7072 7070              i_ddi_devi_attached(child)) ||
7073 7071              (!i_ddi_devi_attached(child) &&
7074 7072              !DEVI_IS_ATTACHING(child))) {
7075 7073                  MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n"));
7076 7074                  i_mdi_pm_reset_client(ct);
7077 7075          } else {
7078 7076                  mdi_pathinfo_t  *pip, *next;
7079 7077                  int     valid_path_count = 0;
7080 7078  
7081 7079                  MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n"));
7082 7080                  pip = ct->ct_path_head;
7083 7081                  while (pip != NULL) {
7084 7082                          MDI_PI_LOCK(pip);
7085 7083                          next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
7086 7084                          if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
7087 7085                                  valid_path_count ++;
7088 7086                          MDI_PI_UNLOCK(pip);
7089 7087                          pip = next;
7090 7088                  }
7091 7089                  i_mdi_pm_rele_client(ct, valid_path_count);
7092 7090                  ct->ct_powercnt_unconfig = 0;
7093 7091          }
7094 7092  
7095 7093          MDI_CLIENT_UNLOCK(ct);
7096 7094  }
7097 7095  
7098 7096  static void
7099 7097  i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held)
7100 7098  {
7101 7099          int                     circ;
7102 7100          dev_info_t              *cdip;
7103 7101  
7104 7102          ASSERT(MDI_VHCI(vdip));
7105 7103  
7106 7104          if (!held) {
7107 7105                  MDI_DEBUG(4, (MDI_NOTE, vdip, "held = %d", held));
7108 7106                  return;
7109 7107          }
7110 7108  
7111 7109          if (child) {
7112 7110                  ASSERT(DEVI_BUSY_OWNED(vdip));
7113 7111                  i_mdi_pm_post_unconfig_one(child);
7114 7112                  return;
7115 7113          }
7116 7114  
7117 7115          ndi_devi_enter(vdip, &circ);
7118 7116          cdip = ddi_get_child(vdip);
7119 7117          while (cdip) {
7120 7118                  dev_info_t *next = ddi_get_next_sibling(cdip);
7121 7119  
7122 7120                  i_mdi_pm_post_unconfig_one(cdip);
7123 7121                  cdip = next;
7124 7122          }
7125 7123          ndi_devi_exit(vdip, circ);
7126 7124  }
7127 7125  
7128 7126  int
7129 7127  mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags)
7130 7128  {
7131 7129          int                     circ, ret = MDI_SUCCESS;
7132 7130          dev_info_t              *client_dip = NULL;
7133 7131          mdi_client_t            *ct;
7134 7132  
7135 7133          /*
7136 7134           * Handling ndi_devi_config_one and ndi_devi_unconfig_one.
7137 7135           * Power up pHCI for the named client device.
7138 7136           * Note: Before the client is enumerated under vhci by phci,
7139 7137           * client_dip can be NULL. Then proceed to power up all the
7140 7138           * pHCIs.
7141 7139           */
7142 7140          if (devnm != NULL) {
7143 7141                  ndi_devi_enter(vdip, &circ);
7144 7142                  client_dip = ndi_devi_findchild(vdip, devnm);
7145 7143          }
7146 7144  
7147 7145          MDI_DEBUG(4, (MDI_NOTE, vdip,
7148 7146              "op = %d %s %p", op, devnm ? devnm : "", (void *)client_dip));
7149 7147  
7150 7148          switch (op) {
7151 7149          case MDI_PM_PRE_CONFIG:
7152 7150                  ret = i_mdi_pm_pre_config(vdip, client_dip);
7153 7151                  break;
7154 7152  
7155 7153          case MDI_PM_PRE_UNCONFIG:
7156 7154                  ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args,
7157 7155                      flags);
7158 7156                  break;
7159 7157  
7160 7158          case MDI_PM_POST_CONFIG:
7161 7159                  i_mdi_pm_post_config(vdip, client_dip);
7162 7160                  break;
7163 7161  
7164 7162          case MDI_PM_POST_UNCONFIG:
7165 7163                  i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args);
7166 7164                  break;
7167 7165  
7168 7166          case MDI_PM_HOLD_POWER:
7169 7167          case MDI_PM_RELE_POWER:
7170 7168                  ASSERT(args);
7171 7169  
7172 7170                  client_dip = (dev_info_t *)args;
7173 7171                  ASSERT(MDI_CLIENT(client_dip));
7174 7172  
7175 7173                  ct = i_devi_get_client(client_dip);
7176 7174                  MDI_CLIENT_LOCK(ct);
7177 7175  
7178 7176                  if (op == MDI_PM_HOLD_POWER) {
7179 7177                          if (ct->ct_power_cnt == 0) {
7180 7178                                  (void) i_mdi_power_all_phci(ct);
7181 7179                                  MDI_DEBUG(4, (MDI_NOTE, client_dip,
7182 7180                                      "i_mdi_pm_hold_client\n"));
7183 7181                                  i_mdi_pm_hold_client(ct, ct->ct_path_count);
7184 7182                          }
7185 7183                  } else {
7186 7184                          if (DEVI_IS_ATTACHING(client_dip)) {
7187 7185                                  MDI_DEBUG(4, (MDI_NOTE, client_dip,
7188 7186                                      "i_mdi_pm_rele_client\n"));
7189 7187                                  i_mdi_pm_rele_client(ct, ct->ct_path_count);
7190 7188                          } else {
7191 7189                                  MDI_DEBUG(4, (MDI_NOTE, client_dip,
7192 7190                                      "i_mdi_pm_reset_client\n"));
7193 7191                                  i_mdi_pm_reset_client(ct);
7194 7192                          }
7195 7193                  }
7196 7194  
7197 7195                  MDI_CLIENT_UNLOCK(ct);
7198 7196                  break;
7199 7197  
7200 7198          default:
7201 7199                  break;
7202 7200          }
7203 7201  
7204 7202          if (devnm)
7205 7203                  ndi_devi_exit(vdip, circ);
7206 7204  
7207 7205          return (ret);
7208 7206  }
7209 7207  
7210 7208  int
7211 7209  mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class)
7212 7210  {
7213 7211          mdi_vhci_t *vhci;
7214 7212  
7215 7213          if (!MDI_VHCI(dip))
7216 7214                  return (MDI_FAILURE);
7217 7215  
7218 7216          if (mdi_class) {
7219 7217                  vhci = DEVI(dip)->devi_mdi_xhci;
7220 7218                  ASSERT(vhci);
7221 7219                  *mdi_class = vhci->vh_class;
7222 7220          }
7223 7221  
7224 7222          return (MDI_SUCCESS);
7225 7223  }
7226 7224  
7227 7225  int
7228 7226  mdi_component_is_phci(dev_info_t *dip, const char **mdi_class)
7229 7227  {
7230 7228          mdi_phci_t *phci;
7231 7229  
7232 7230          if (!MDI_PHCI(dip))
7233 7231                  return (MDI_FAILURE);
7234 7232  
7235 7233          if (mdi_class) {
7236 7234                  phci = DEVI(dip)->devi_mdi_xhci;
7237 7235                  ASSERT(phci);
7238 7236                  *mdi_class = phci->ph_vhci->vh_class;
7239 7237          }
7240 7238  
7241 7239          return (MDI_SUCCESS);
7242 7240  }
7243 7241  
7244 7242  int
7245 7243  mdi_component_is_client(dev_info_t *dip, const char **mdi_class)
7246 7244  {
7247 7245          mdi_client_t *client;
7248 7246  
7249 7247          if (!MDI_CLIENT(dip))
7250 7248                  return (MDI_FAILURE);
7251 7249  
7252 7250          if (mdi_class) {
7253 7251                  client = DEVI(dip)->devi_mdi_client;
7254 7252                  ASSERT(client);
7255 7253                  *mdi_class = client->ct_vhci->vh_class;
7256 7254          }
7257 7255  
7258 7256          return (MDI_SUCCESS);
7259 7257  }
7260 7258  
7261 7259  void *
7262 7260  mdi_client_get_vhci_private(dev_info_t *dip)
7263 7261  {
7264 7262          ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
7265 7263          if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
7266 7264                  mdi_client_t    *ct;
7267 7265                  ct = i_devi_get_client(dip);
7268 7266                  return (ct->ct_vprivate);
7269 7267          }
7270 7268          return (NULL);
7271 7269  }
7272 7270  
7273 7271  void
7274 7272  mdi_client_set_vhci_private(dev_info_t *dip, void *data)
7275 7273  {
7276 7274          ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
7277 7275          if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
7278 7276                  mdi_client_t    *ct;
7279 7277                  ct = i_devi_get_client(dip);
7280 7278                  ct->ct_vprivate = data;
7281 7279          }
7282 7280  }
7283 7281  /*
7284 7282   * mdi_pi_get_vhci_private():
7285 7283   *              Get the vhci private information associated with the
7286 7284   *              mdi_pathinfo node
7287 7285   */
7288 7286  void *
7289 7287  mdi_pi_get_vhci_private(mdi_pathinfo_t *pip)
7290 7288  {
7291 7289          caddr_t vprivate = NULL;
7292 7290          if (pip) {
7293 7291                  vprivate = MDI_PI(pip)->pi_vprivate;
7294 7292          }
7295 7293          return (vprivate);
7296 7294  }
7297 7295  
7298 7296  /*
7299 7297   * mdi_pi_set_vhci_private():
7300 7298   *              Set the vhci private information in the mdi_pathinfo node
7301 7299   */
7302 7300  void
7303 7301  mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv)
7304 7302  {
7305 7303          if (pip) {
7306 7304                  MDI_PI(pip)->pi_vprivate = priv;
7307 7305          }
7308 7306  }
7309 7307  
7310 7308  /*
7311 7309   * mdi_phci_get_vhci_private():
7312 7310   *              Get the vhci private information associated with the
7313 7311   *              mdi_phci node
7314 7312   */
7315 7313  void *
7316 7314  mdi_phci_get_vhci_private(dev_info_t *dip)
7317 7315  {
7318 7316          ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
7319 7317          if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
7320 7318                  mdi_phci_t      *ph;
7321 7319                  ph = i_devi_get_phci(dip);
7322 7320                  return (ph->ph_vprivate);
7323 7321          }
7324 7322          return (NULL);
7325 7323  }
7326 7324  
7327 7325  /*
7328 7326   * mdi_phci_set_vhci_private():
7329 7327   *              Set the vhci private information in the mdi_phci node
7330 7328   */
7331 7329  void
7332 7330  mdi_phci_set_vhci_private(dev_info_t *dip, void *priv)
7333 7331  {
7334 7332          ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
7335 7333          if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
7336 7334                  mdi_phci_t      *ph;
7337 7335                  ph = i_devi_get_phci(dip);
7338 7336                  ph->ph_vprivate = priv;
7339 7337          }
7340 7338  }
7341 7339  
7342 7340  int
7343 7341  mdi_pi_ishidden(mdi_pathinfo_t *pip)
7344 7342  {
7345 7343          return (MDI_PI_FLAGS_IS_HIDDEN(pip));
7346 7344  }
7347 7345  
7348 7346  int
7349 7347  mdi_pi_device_isremoved(mdi_pathinfo_t *pip)
7350 7348  {
7351 7349          return (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip));
7352 7350  }
7353 7351  
7354 7352  /* Return 1 if all client paths are device_removed */
7355 7353  static int
7356 7354  i_mdi_client_all_devices_removed(mdi_client_t *ct)
7357 7355  {
7358 7356          mdi_pathinfo_t  *pip;
7359 7357          int             all_devices_removed = 1;
7360 7358  
7361 7359          MDI_CLIENT_LOCK(ct);
7362 7360          for (pip = ct->ct_path_head; pip;
7363 7361              pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) {
7364 7362                  if (!mdi_pi_device_isremoved(pip)) {
7365 7363                          all_devices_removed = 0;
7366 7364                          break;
7367 7365                  }
7368 7366          }
7369 7367          MDI_CLIENT_UNLOCK(ct);
7370 7368          return (all_devices_removed);
7371 7369  }
7372 7370  
7373 7371  /*
7374 7372   * When processing path hotunplug, represent device removal.
7375 7373   */
7376 7374  int
7377 7375  mdi_pi_device_remove(mdi_pathinfo_t *pip)
7378 7376  {
7379 7377          mdi_client_t    *ct;
7380 7378  
7381 7379          MDI_PI_LOCK(pip);
7382 7380          if (mdi_pi_device_isremoved(pip)) {
7383 7381                  MDI_PI_UNLOCK(pip);
7384 7382                  return (0);
7385 7383          }
7386 7384          MDI_PI_FLAGS_SET_DEVICE_REMOVED(pip);
7387 7385          MDI_PI_FLAGS_SET_HIDDEN(pip);
7388 7386          MDI_PI_UNLOCK(pip);
7389 7387  
7390 7388          /*
7391 7389           * If all paths associated with the client are now DEVICE_REMOVED,
7392 7390           * reflect DEVICE_REMOVED in the client.
7393 7391           */
7394 7392          ct = MDI_PI(pip)->pi_client;
7395 7393          if (ct && ct->ct_dip && i_mdi_client_all_devices_removed(ct))
7396 7394                  (void) ndi_devi_device_remove(ct->ct_dip);
7397 7395          else
7398 7396                  i_ddi_di_cache_invalidate();
7399 7397  
7400 7398          return (1);
7401 7399  }
7402 7400  
7403 7401  /*
7404 7402   * When processing hotplug, if a path marked mdi_pi_device_isremoved()
7405 7403   * is now accessible then this interfaces is used to represent device insertion.
7406 7404   */
7407 7405  int
7408 7406  mdi_pi_device_insert(mdi_pathinfo_t *pip)
7409 7407  {
7410 7408          MDI_PI_LOCK(pip);
7411 7409          if (!mdi_pi_device_isremoved(pip)) {
7412 7410                  MDI_PI_UNLOCK(pip);
7413 7411                  return (0);
7414 7412          }
7415 7413          MDI_PI_FLAGS_CLR_DEVICE_REMOVED(pip);
7416 7414          MDI_PI_FLAGS_CLR_HIDDEN(pip);
7417 7415          MDI_PI_UNLOCK(pip);
7418 7416  
7419 7417          i_ddi_di_cache_invalidate();
7420 7418  
7421 7419          return (1);
7422 7420  }
7423 7421  
7424 7422  /*
7425 7423   * List of vhci class names:
7426 7424   * A vhci class name must be in this list only if the corresponding vhci
7427 7425   * driver intends to use the mdi provided bus config implementation
7428 7426   * (i.e., mdi_vhci_bus_config()).
7429 7427   */
7430 7428  static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB };
7431 7429  #define N_VHCI_CLASSES  (sizeof (vhci_class_list) / sizeof (char *))
7432 7430  
7433 7431  /*
7434 7432   * During boot time, the on-disk vhci cache for every vhci class is read
7435 7433   * in the form of an nvlist and stored here.
7436 7434   */
7437 7435  static nvlist_t *vhcache_nvl[N_VHCI_CLASSES];
7438 7436  
7439 7437  /* nvpair names in vhci cache nvlist */
7440 7438  #define MDI_VHCI_CACHE_VERSION  1
7441 7439  #define MDI_NVPNAME_VERSION     "version"
7442 7440  #define MDI_NVPNAME_PHCIS       "phcis"
7443 7441  #define MDI_NVPNAME_CTADDRMAP   "clientaddrmap"
7444 7442  
7445 7443  /*
7446 7444   * Given vhci class name, return its on-disk vhci cache filename.
7447 7445   * Memory for the returned filename which includes the full path is allocated
7448 7446   * by this function.
7449 7447   */
7450 7448  static char *
7451 7449  vhclass2vhcache_filename(char *vhclass)
7452 7450  {
7453 7451          char *filename;
7454 7452          int len;
7455 7453          static char *fmt = "/etc/devices/mdi_%s_cache";
7456 7454  
7457 7455          /*
7458 7456           * fmt contains the on-disk vhci cache file name format;
7459 7457           * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache".
7460 7458           */
7461 7459  
7462 7460          /* the -1 below is to account for "%s" in the format string */
7463 7461          len = strlen(fmt) + strlen(vhclass) - 1;
7464 7462          filename = kmem_alloc(len, KM_SLEEP);
7465 7463          (void) snprintf(filename, len, fmt, vhclass);
7466 7464          ASSERT(len == (strlen(filename) + 1));
7467 7465          return (filename);
7468 7466  }
7469 7467  
7470 7468  /*
7471 7469   * initialize the vhci cache related data structures and read the on-disk
7472 7470   * vhci cached data into memory.
7473 7471   */
7474 7472  static void
7475 7473  setup_vhci_cache(mdi_vhci_t *vh)
7476 7474  {
7477 7475          mdi_vhci_config_t *vhc;
7478 7476          mdi_vhci_cache_t *vhcache;
7479 7477          int i;
7480 7478          nvlist_t *nvl = NULL;
7481 7479  
7482 7480          vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP);
7483 7481          vh->vh_config = vhc;
7484 7482          vhcache = &vhc->vhc_vhcache;
7485 7483  
7486 7484          vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class);
7487 7485  
7488 7486          mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL);
7489 7487          cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL);
7490 7488  
7491 7489          rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL);
7492 7490  
7493 7491          /*
7494 7492           * Create string hash; same as mod_hash_create_strhash() except that
7495 7493           * we use NULL key destructor.
7496 7494           */
7497 7495          vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class,
7498 7496              mdi_bus_config_cache_hash_size,
7499 7497              mod_hash_null_keydtor, mod_hash_null_valdtor,
7500 7498              mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
7501 7499  
7502 7500          /*
7503 7501           * The on-disk vhci cache is read during booting prior to the
7504 7502           * lights-out period by mdi_read_devices_files().
7505 7503           */
7506 7504          for (i = 0; i < N_VHCI_CLASSES; i++) {
7507 7505                  if (strcmp(vhci_class_list[i], vh->vh_class) == 0) {
7508 7506                          nvl = vhcache_nvl[i];
7509 7507                          vhcache_nvl[i] = NULL;
7510 7508                          break;
7511 7509                  }
7512 7510          }
7513 7511  
7514 7512          /*
7515 7513           * this is to cover the case of some one manually causing unloading
7516 7514           * (or detaching) and reloading (or attaching) of a vhci driver.
7517 7515           */
7518 7516          if (nvl == NULL && modrootloaded)
7519 7517                  nvl = read_on_disk_vhci_cache(vh->vh_class);
7520 7518  
7521 7519          if (nvl != NULL) {
7522 7520                  rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7523 7521                  if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS)
7524 7522                          vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
7525 7523                  else  {
7526 7524                          cmn_err(CE_WARN,
7527 7525                              "%s: data file corrupted, will recreate",
7528 7526                              vhc->vhc_vhcache_filename);
7529 7527                  }
7530 7528                  rw_exit(&vhcache->vhcache_lock);
7531 7529                  nvlist_free(nvl);
7532 7530          }
7533 7531  
7534 7532          vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc,
7535 7533              CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush");
7536 7534  
7537 7535          vhc->vhc_path_discovery_boot = mdi_path_discovery_boot;
7538 7536          vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot;
7539 7537  }
7540 7538  
7541 7539  /*
7542 7540   * free all vhci cache related resources
7543 7541   */
7544 7542  static int
7545 7543  destroy_vhci_cache(mdi_vhci_t *vh)
7546 7544  {
7547 7545          mdi_vhci_config_t *vhc = vh->vh_config;
7548 7546          mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7549 7547          mdi_vhcache_phci_t *cphci, *cphci_next;
7550 7548          mdi_vhcache_client_t *cct, *cct_next;
7551 7549          mdi_vhcache_pathinfo_t *cpi, *cpi_next;
7552 7550  
7553 7551          if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS)
7554 7552                  return (MDI_FAILURE);
7555 7553  
7556 7554          kmem_free(vhc->vhc_vhcache_filename,
7557 7555              strlen(vhc->vhc_vhcache_filename) + 1);
7558 7556  
7559 7557          mod_hash_destroy_strhash(vhcache->vhcache_client_hash);
7560 7558  
7561 7559          for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7562 7560              cphci = cphci_next) {
7563 7561                  cphci_next = cphci->cphci_next;
7564 7562                  free_vhcache_phci(cphci);
7565 7563          }
7566 7564  
7567 7565          for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) {
7568 7566                  cct_next = cct->cct_next;
7569 7567                  for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) {
7570 7568                          cpi_next = cpi->cpi_next;
7571 7569                          free_vhcache_pathinfo(cpi);
7572 7570                  }
7573 7571                  free_vhcache_client(cct);
7574 7572          }
7575 7573  
7576 7574          rw_destroy(&vhcache->vhcache_lock);
7577 7575  
7578 7576          mutex_destroy(&vhc->vhc_lock);
7579 7577          cv_destroy(&vhc->vhc_cv);
7580 7578          kmem_free(vhc, sizeof (mdi_vhci_config_t));
7581 7579          return (MDI_SUCCESS);
7582 7580  }
7583 7581  
7584 7582  /*
7585 7583   * Stop all vhci cache related async threads and free their resources.
7586 7584   */
7587 7585  static int
7588 7586  stop_vhcache_async_threads(mdi_vhci_config_t *vhc)
7589 7587  {
7590 7588          mdi_async_client_config_t *acc, *acc_next;
7591 7589  
7592 7590          mutex_enter(&vhc->vhc_lock);
7593 7591          vhc->vhc_flags |= MDI_VHC_EXIT;
7594 7592          ASSERT(vhc->vhc_acc_thrcount >= 0);
7595 7593          cv_broadcast(&vhc->vhc_cv);
7596 7594  
7597 7595          while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) ||
7598 7596              vhc->vhc_acc_thrcount != 0) {
7599 7597                  mutex_exit(&vhc->vhc_lock);
7600 7598                  delay_random(mdi_delay);
7601 7599                  mutex_enter(&vhc->vhc_lock);
7602 7600          }
7603 7601  
7604 7602          vhc->vhc_flags &= ~MDI_VHC_EXIT;
7605 7603  
7606 7604          for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) {
7607 7605                  acc_next = acc->acc_next;
7608 7606                  free_async_client_config(acc);
7609 7607          }
7610 7608          vhc->vhc_acc_list_head = NULL;
7611 7609          vhc->vhc_acc_list_tail = NULL;
7612 7610          vhc->vhc_acc_count = 0;
7613 7611  
7614 7612          if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7615 7613                  vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7616 7614                  mutex_exit(&vhc->vhc_lock);
7617 7615                  if (flush_vhcache(vhc, 0) != MDI_SUCCESS) {
7618 7616                          vhcache_dirty(vhc);
7619 7617                          return (MDI_FAILURE);
7620 7618                  }
7621 7619          } else
7622 7620                  mutex_exit(&vhc->vhc_lock);
7623 7621  
7624 7622          if (callb_delete(vhc->vhc_cbid) != 0)
7625 7623                  return (MDI_FAILURE);
7626 7624  
7627 7625          return (MDI_SUCCESS);
7628 7626  }
7629 7627  
7630 7628  /*
7631 7629   * Stop vhci cache flush thread
7632 7630   */
7633 7631  /* ARGSUSED */
7634 7632  static boolean_t
7635 7633  stop_vhcache_flush_thread(void *arg, int code)
7636 7634  {
7637 7635          mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7638 7636  
7639 7637          mutex_enter(&vhc->vhc_lock);
7640 7638          vhc->vhc_flags |= MDI_VHC_EXIT;
7641 7639          cv_broadcast(&vhc->vhc_cv);
7642 7640  
7643 7641          while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
7644 7642                  mutex_exit(&vhc->vhc_lock);
7645 7643                  delay_random(mdi_delay);
7646 7644                  mutex_enter(&vhc->vhc_lock);
7647 7645          }
7648 7646  
7649 7647          if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7650 7648                  vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7651 7649                  mutex_exit(&vhc->vhc_lock);
7652 7650                  (void) flush_vhcache(vhc, 1);
7653 7651          } else
7654 7652                  mutex_exit(&vhc->vhc_lock);
7655 7653  
7656 7654          return (B_TRUE);
7657 7655  }
7658 7656  
7659 7657  /*
7660 7658   * Enqueue the vhcache phci (cphci) at the tail of the list
7661 7659   */
7662 7660  static void
7663 7661  enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci)
7664 7662  {
7665 7663          cphci->cphci_next = NULL;
7666 7664          if (vhcache->vhcache_phci_head == NULL)
7667 7665                  vhcache->vhcache_phci_head = cphci;
7668 7666          else
7669 7667                  vhcache->vhcache_phci_tail->cphci_next = cphci;
7670 7668          vhcache->vhcache_phci_tail = cphci;
7671 7669  }
7672 7670  
7673 7671  /*
7674 7672   * Enqueue the vhcache pathinfo (cpi) at the tail of the list
7675 7673   */
7676 7674  static void
7677 7675  enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7678 7676      mdi_vhcache_pathinfo_t *cpi)
7679 7677  {
7680 7678          cpi->cpi_next = NULL;
7681 7679          if (cct->cct_cpi_head == NULL)
7682 7680                  cct->cct_cpi_head = cpi;
7683 7681          else
7684 7682                  cct->cct_cpi_tail->cpi_next = cpi;
7685 7683          cct->cct_cpi_tail = cpi;
7686 7684  }
7687 7685  
7688 7686  /*
7689 7687   * Enqueue the vhcache pathinfo (cpi) at the correct location in the
7690 7688   * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
7691 7689   * flag set come at the beginning of the list. All cpis which have this
7692 7690   * flag set come at the end of the list.
7693 7691   */
7694 7692  static void
7695 7693  enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7696 7694      mdi_vhcache_pathinfo_t *newcpi)
7697 7695  {
7698 7696          mdi_vhcache_pathinfo_t *cpi, *prev_cpi;
7699 7697  
7700 7698          if (cct->cct_cpi_head == NULL ||
7701 7699              (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))
7702 7700                  enqueue_tail_vhcache_pathinfo(cct, newcpi);
7703 7701          else {
7704 7702                  for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL &&
7705 7703                      !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST);
7706 7704                      prev_cpi = cpi, cpi = cpi->cpi_next)
7707 7705                          ;
7708 7706  
7709 7707                  if (prev_cpi == NULL)
7710 7708                          cct->cct_cpi_head = newcpi;
7711 7709                  else
7712 7710                          prev_cpi->cpi_next = newcpi;
7713 7711  
7714 7712                  newcpi->cpi_next = cpi;
7715 7713  
7716 7714                  if (cpi == NULL)
7717 7715                          cct->cct_cpi_tail = newcpi;
7718 7716          }
7719 7717  }
7720 7718  
7721 7719  /*
7722 7720   * Enqueue the vhcache client (cct) at the tail of the list
7723 7721   */
7724 7722  static void
7725 7723  enqueue_vhcache_client(mdi_vhci_cache_t *vhcache,
7726 7724      mdi_vhcache_client_t *cct)
7727 7725  {
7728 7726          cct->cct_next = NULL;
7729 7727          if (vhcache->vhcache_client_head == NULL)
7730 7728                  vhcache->vhcache_client_head = cct;
7731 7729          else
7732 7730                  vhcache->vhcache_client_tail->cct_next = cct;
7733 7731          vhcache->vhcache_client_tail = cct;
7734 7732  }
7735 7733  
7736 7734  static void
7737 7735  free_string_array(char **str, int nelem)
7738 7736  {
7739 7737          int i;
7740 7738  
7741 7739          if (str) {
7742 7740                  for (i = 0; i < nelem; i++) {
7743 7741                          if (str[i])
7744 7742                                  kmem_free(str[i], strlen(str[i]) + 1);
7745 7743                  }
7746 7744                  kmem_free(str, sizeof (char *) * nelem);
7747 7745          }
7748 7746  }
7749 7747  
7750 7748  static void
7751 7749  free_vhcache_phci(mdi_vhcache_phci_t *cphci)
7752 7750  {
7753 7751          kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1);
7754 7752          kmem_free(cphci, sizeof (*cphci));
7755 7753  }
7756 7754  
7757 7755  static void
7758 7756  free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi)
7759 7757  {
7760 7758          kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1);
7761 7759          kmem_free(cpi, sizeof (*cpi));
7762 7760  }
7763 7761  
7764 7762  static void
7765 7763  free_vhcache_client(mdi_vhcache_client_t *cct)
7766 7764  {
7767 7765          kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1);
7768 7766          kmem_free(cct, sizeof (*cct));
7769 7767  }
7770 7768  
7771 7769  static char *
7772 7770  vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len)
7773 7771  {
7774 7772          char *name_addr;
7775 7773          int len;
7776 7774  
7777 7775          len = strlen(ct_name) + strlen(ct_addr) + 2;
7778 7776          name_addr = kmem_alloc(len, KM_SLEEP);
7779 7777          (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr);
7780 7778  
7781 7779          if (ret_len)
7782 7780                  *ret_len = len;
7783 7781          return (name_addr);
7784 7782  }
7785 7783  
7786 7784  /*
7787 7785   * Copy the contents of paddrnvl to vhci cache.
7788 7786   * paddrnvl nvlist contains path information for a vhci client.
7789 7787   * See the comment in mainnvl_to_vhcache() for the format of this nvlist.
7790 7788   */
7791 7789  static void
7792 7790  paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[],
7793 7791      mdi_vhcache_client_t *cct)
7794 7792  {
7795 7793          nvpair_t *nvp = NULL;
7796 7794          mdi_vhcache_pathinfo_t *cpi;
7797 7795          uint_t nelem;
7798 7796          uint32_t *val;
7799 7797  
7800 7798          while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7801 7799                  ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY);
7802 7800                  cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
7803 7801                  cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7804 7802                  (void) nvpair_value_uint32_array(nvp, &val, &nelem);
7805 7803                  ASSERT(nelem == 2);
7806 7804                  cpi->cpi_cphci = cphci_list[val[0]];
7807 7805                  cpi->cpi_flags = val[1];
7808 7806                  enqueue_tail_vhcache_pathinfo(cct, cpi);
7809 7807          }
7810 7808  }
7811 7809  
7812 7810  /*
7813 7811   * Copy the contents of caddrmapnvl to vhci cache.
7814 7812   * caddrmapnvl nvlist contains vhci client address to phci client address
7815 7813   * mappings. See the comment in mainnvl_to_vhcache() for the format of
7816 7814   * this nvlist.
7817 7815   */
7818 7816  static void
7819 7817  caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl,
7820 7818      mdi_vhcache_phci_t *cphci_list[])
7821 7819  {
7822 7820          nvpair_t *nvp = NULL;
7823 7821          nvlist_t *paddrnvl;
7824 7822          mdi_vhcache_client_t *cct;
7825 7823  
7826 7824          while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7827 7825                  ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST);
7828 7826                  cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
7829 7827                  cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7830 7828                  (void) nvpair_value_nvlist(nvp, &paddrnvl);
7831 7829                  paddrnvl_to_vhcache(paddrnvl, cphci_list, cct);
7832 7830                  /* the client must contain at least one path */
7833 7831                  ASSERT(cct->cct_cpi_head != NULL);
7834 7832  
7835 7833                  enqueue_vhcache_client(vhcache, cct);
7836 7834                  (void) mod_hash_insert(vhcache->vhcache_client_hash,
7837 7835                      (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
7838 7836          }
7839 7837  }
7840 7838  
7841 7839  /*
7842 7840   * Copy the contents of the main nvlist to vhci cache.
7843 7841   *
7844 7842   * VHCI busconfig cached data is stored in the form of a nvlist on the disk.
7845 7843   * The nvlist contains the mappings between the vhci client addresses and
7846 7844   * their corresponding phci client addresses.
7847 7845   *
7848 7846   * The structure of the nvlist is as follows:
7849 7847   *
7850 7848   * Main nvlist:
7851 7849   *      NAME            TYPE            DATA
7852 7850   *      version         int32           version number
7853 7851   *      phcis           string array    array of phci paths
7854 7852   *      clientaddrmap   nvlist_t        c2paddrs_nvl (see below)
7855 7853   *
7856 7854   * structure of c2paddrs_nvl:
7857 7855   *      NAME            TYPE            DATA
7858 7856   *      caddr1          nvlist_t        paddrs_nvl1
7859 7857   *      caddr2          nvlist_t        paddrs_nvl2
7860 7858   *      ...
7861 7859   * where caddr1, caddr2, ... are vhci client name and addresses in the
7862 7860   * form of "<clientname>@<clientaddress>".
7863 7861   * (for example: "ssd@2000002037cd9f72");
7864 7862   * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information.
7865 7863   *
7866 7864   * structure of paddrs_nvl:
7867 7865   *      NAME            TYPE            DATA
7868 7866   *      pi_addr1        uint32_array    (phci-id, cpi_flags)
7869 7867   *      pi_addr2        uint32_array    (phci-id, cpi_flags)
7870 7868   *      ...
7871 7869   * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes
7872 7870   * (so called pi_addrs, for example: "w2100002037cd9f72,0");
7873 7871   * phci-ids are integers that identify pHCIs to which the
7874 7872   * the bus specific address belongs to. These integers are used as an index
7875 7873   * into to the phcis string array in the main nvlist to get the pHCI path.
7876 7874   */
7877 7875  static int
7878 7876  mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl)
7879 7877  {
7880 7878          char **phcis, **phci_namep;
7881 7879          uint_t nphcis;
7882 7880          mdi_vhcache_phci_t *cphci, **cphci_list;
7883 7881          nvlist_t *caddrmapnvl;
7884 7882          int32_t ver;
7885 7883          int i;
7886 7884          size_t cphci_list_size;
7887 7885  
7888 7886          ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock));
7889 7887  
7890 7888          if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 ||
7891 7889              ver != MDI_VHCI_CACHE_VERSION)
7892 7890                  return (MDI_FAILURE);
7893 7891  
7894 7892          if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis,
7895 7893              &nphcis) != 0)
7896 7894                  return (MDI_SUCCESS);
7897 7895  
7898 7896          ASSERT(nphcis > 0);
7899 7897  
7900 7898          cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis;
7901 7899          cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP);
7902 7900          for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) {
7903 7901                  cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP);
7904 7902                  cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP);
7905 7903                  enqueue_vhcache_phci(vhcache, cphci);
7906 7904                  cphci_list[i] = cphci;
7907 7905          }
7908 7906  
7909 7907          ASSERT(vhcache->vhcache_phci_head != NULL);
7910 7908  
7911 7909          if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0)
7912 7910                  caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list);
7913 7911  
7914 7912          kmem_free(cphci_list, cphci_list_size);
7915 7913          return (MDI_SUCCESS);
7916 7914  }
7917 7915  
7918 7916  /*
7919 7917   * Build paddrnvl for the specified client using the information in the
7920 7918   * vhci cache and add it to the caddrmapnnvl.
7921 7919   * Returns 0 on success, errno on failure.
7922 7920   */
7923 7921  static int
7924 7922  vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct,
7925 7923      nvlist_t *caddrmapnvl)
7926 7924  {
7927 7925          mdi_vhcache_pathinfo_t *cpi;
7928 7926          nvlist_t *nvl;
7929 7927          int err;
7930 7928          uint32_t val[2];
7931 7929  
7932 7930          ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7933 7931  
7934 7932          if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0)
7935 7933                  return (err);
7936 7934  
7937 7935          for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7938 7936                  val[0] = cpi->cpi_cphci->cphci_id;
7939 7937                  val[1] = cpi->cpi_flags;
7940 7938                  if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2))
7941 7939                      != 0)
7942 7940                          goto out;
7943 7941          }
7944 7942  
7945 7943          err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl);
7946 7944  out:
7947 7945          nvlist_free(nvl);
7948 7946          return (err);
7949 7947  }
7950 7948  
7951 7949  /*
7952 7950   * Build caddrmapnvl using the information in the vhci cache
7953 7951   * and add it to the mainnvl.
7954 7952   * Returns 0 on success, errno on failure.
7955 7953   */
7956 7954  static int
7957 7955  vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl)
7958 7956  {
7959 7957          mdi_vhcache_client_t *cct;
7960 7958          nvlist_t *nvl;
7961 7959          int err;
7962 7960  
7963 7961          ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7964 7962  
7965 7963          if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0)
7966 7964                  return (err);
7967 7965  
7968 7966          for (cct = vhcache->vhcache_client_head; cct != NULL;
7969 7967              cct = cct->cct_next) {
7970 7968                  if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0)
7971 7969                          goto out;
7972 7970          }
7973 7971  
7974 7972          err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl);
7975 7973  out:
7976 7974          nvlist_free(nvl);
7977 7975          return (err);
7978 7976  }
7979 7977  
7980 7978  /*
7981 7979   * Build nvlist using the information in the vhci cache.
7982 7980   * See the comment in mainnvl_to_vhcache() for the format of the nvlist.
7983 7981   * Returns nvl on success, NULL on failure.
7984 7982   */
7985 7983  static nvlist_t *
7986 7984  vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache)
7987 7985  {
7988 7986          mdi_vhcache_phci_t *cphci;
7989 7987          uint_t phci_count;
7990 7988          char **phcis;
7991 7989          nvlist_t *nvl;
7992 7990          int err, i;
7993 7991  
7994 7992          if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) {
7995 7993                  nvl = NULL;
7996 7994                  goto out;
7997 7995          }
7998 7996  
7999 7997          if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION,
8000 7998              MDI_VHCI_CACHE_VERSION)) != 0)
8001 7999                  goto out;
8002 8000  
8003 8001          rw_enter(&vhcache->vhcache_lock, RW_READER);
8004 8002          if (vhcache->vhcache_phci_head == NULL) {
8005 8003                  rw_exit(&vhcache->vhcache_lock);
8006 8004                  return (nvl);
8007 8005          }
8008 8006  
8009 8007          phci_count = 0;
8010 8008          for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8011 8009              cphci = cphci->cphci_next)
8012 8010                  cphci->cphci_id = phci_count++;
8013 8011  
8014 8012          /* build phci pathname list */
8015 8013          phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP);
8016 8014          for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL;
8017 8015              cphci = cphci->cphci_next, i++)
8018 8016                  phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP);
8019 8017  
8020 8018          err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis,
8021 8019              phci_count);
8022 8020          free_string_array(phcis, phci_count);
8023 8021  
8024 8022          if (err == 0 &&
8025 8023              (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) {
8026 8024                  rw_exit(&vhcache->vhcache_lock);
8027 8025                  return (nvl);
8028 8026          }
8029 8027  
8030 8028          rw_exit(&vhcache->vhcache_lock);
8031 8029  out:
8032 8030          nvlist_free(nvl);
8033 8031          return (NULL);
8034 8032  }
8035 8033  
8036 8034  /*
8037 8035   * Lookup vhcache phci structure for the specified phci path.
8038 8036   */
8039 8037  static mdi_vhcache_phci_t *
8040 8038  lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path)
8041 8039  {
8042 8040          mdi_vhcache_phci_t *cphci;
8043 8041  
8044 8042          ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8045 8043  
8046 8044          for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8047 8045              cphci = cphci->cphci_next) {
8048 8046                  if (strcmp(cphci->cphci_path, phci_path) == 0)
8049 8047                          return (cphci);
8050 8048          }
8051 8049  
8052 8050          return (NULL);
8053 8051  }
8054 8052  
8055 8053  /*
8056 8054   * Lookup vhcache phci structure for the specified phci.
8057 8055   */
8058 8056  static mdi_vhcache_phci_t *
8059 8057  lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph)
8060 8058  {
8061 8059          mdi_vhcache_phci_t *cphci;
8062 8060  
8063 8061          ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8064 8062  
8065 8063          for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8066 8064              cphci = cphci->cphci_next) {
8067 8065                  if (cphci->cphci_phci == ph)
8068 8066                          return (cphci);
8069 8067          }
8070 8068  
8071 8069          return (NULL);
8072 8070  }
8073 8071  
8074 8072  /*
8075 8073   * Add the specified phci to the vhci cache if not already present.
8076 8074   */
8077 8075  static void
8078 8076  vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
8079 8077  {
8080 8078          mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8081 8079          mdi_vhcache_phci_t *cphci;
8082 8080          char *pathname;
8083 8081          int cache_updated;
8084 8082  
8085 8083          rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8086 8084  
8087 8085          pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8088 8086          (void) ddi_pathname(ph->ph_dip, pathname);
8089 8087          if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname))
8090 8088              != NULL) {
8091 8089                  cphci->cphci_phci = ph;
8092 8090                  cache_updated = 0;
8093 8091          } else {
8094 8092                  cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP);
8095 8093                  cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP);
8096 8094                  cphci->cphci_phci = ph;
8097 8095                  enqueue_vhcache_phci(vhcache, cphci);
8098 8096                  cache_updated = 1;
8099 8097          }
8100 8098  
8101 8099          rw_exit(&vhcache->vhcache_lock);
8102 8100  
8103 8101          /*
8104 8102           * Since a new phci has been added, reset
8105 8103           * vhc_path_discovery_cutoff_time to allow for discovery of paths
8106 8104           * during next vhcache_discover_paths().
8107 8105           */
8108 8106          mutex_enter(&vhc->vhc_lock);
8109 8107          vhc->vhc_path_discovery_cutoff_time = 0;
8110 8108          mutex_exit(&vhc->vhc_lock);
8111 8109  
8112 8110          kmem_free(pathname, MAXPATHLEN);
8113 8111          if (cache_updated)
8114 8112                  vhcache_dirty(vhc);
8115 8113  }
8116 8114  
8117 8115  /*
8118 8116   * Remove the reference to the specified phci from the vhci cache.
8119 8117   */
8120 8118  static void
8121 8119  vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
8122 8120  {
8123 8121          mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8124 8122          mdi_vhcache_phci_t *cphci;
8125 8123  
8126 8124          rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8127 8125          if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) {
8128 8126                  /* do not remove the actual mdi_vhcache_phci structure */
8129 8127                  cphci->cphci_phci = NULL;
8130 8128          }
8131 8129          rw_exit(&vhcache->vhcache_lock);
8132 8130  }
8133 8131  
8134 8132  static void
8135 8133  init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst,
8136 8134      mdi_vhcache_lookup_token_t *src)
8137 8135  {
8138 8136          if (src == NULL) {
8139 8137                  dst->lt_cct = NULL;
8140 8138                  dst->lt_cct_lookup_time = 0;
8141 8139          } else {
8142 8140                  dst->lt_cct = src->lt_cct;
8143 8141                  dst->lt_cct_lookup_time = src->lt_cct_lookup_time;
8144 8142          }
8145 8143  }
8146 8144  
8147 8145  /*
8148 8146   * Look up vhcache client for the specified client.
8149 8147   */
8150 8148  static mdi_vhcache_client_t *
8151 8149  lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr,
8152 8150      mdi_vhcache_lookup_token_t *token)
8153 8151  {
8154 8152          mod_hash_val_t hv;
8155 8153          char *name_addr;
8156 8154          int len;
8157 8155  
8158 8156          ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8159 8157  
8160 8158          /*
8161 8159           * If no vhcache clean occurred since the last lookup, we can
8162 8160           * simply return the cct from the last lookup operation.
8163 8161           * It works because ccts are never freed except during the vhcache
8164 8162           * cleanup operation.
8165 8163           */
8166 8164          if (token != NULL &&
8167 8165              vhcache->vhcache_clean_time < token->lt_cct_lookup_time)
8168 8166                  return (token->lt_cct);
8169 8167  
8170 8168          name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len);
8171 8169          if (mod_hash_find(vhcache->vhcache_client_hash,
8172 8170              (mod_hash_key_t)name_addr, &hv) == 0) {
8173 8171                  if (token) {
8174 8172                          token->lt_cct = (mdi_vhcache_client_t *)hv;
8175 8173                          token->lt_cct_lookup_time = ddi_get_lbolt64();
8176 8174                  }
8177 8175          } else {
8178 8176                  if (token) {
8179 8177                          token->lt_cct = NULL;
8180 8178                          token->lt_cct_lookup_time = 0;
8181 8179                  }
8182 8180                  hv = NULL;
8183 8181          }
8184 8182          kmem_free(name_addr, len);
8185 8183          return ((mdi_vhcache_client_t *)hv);
8186 8184  }
8187 8185  
8188 8186  /*
8189 8187   * Add the specified path to the vhci cache if not already present.
8190 8188   * Also add the vhcache client for the client corresponding to this path
8191 8189   * if it doesn't already exist.
8192 8190   */
8193 8191  static void
8194 8192  vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
8195 8193  {
8196 8194          mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8197 8195          mdi_vhcache_client_t *cct;
8198 8196          mdi_vhcache_pathinfo_t *cpi;
8199 8197          mdi_phci_t *ph = pip->pi_phci;
8200 8198          mdi_client_t *ct = pip->pi_client;
8201 8199          int cache_updated = 0;
8202 8200  
8203 8201          rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8204 8202  
8205 8203          /* if vhcache client for this pip doesn't already exist, add it */
8206 8204          if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
8207 8205              NULL)) == NULL) {
8208 8206                  cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
8209 8207                  cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname,
8210 8208                      ct->ct_guid, NULL);
8211 8209                  enqueue_vhcache_client(vhcache, cct);
8212 8210                  (void) mod_hash_insert(vhcache->vhcache_client_hash,
8213 8211                      (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
8214 8212                  cache_updated = 1;
8215 8213          }
8216 8214  
8217 8215          for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8218 8216                  if (cpi->cpi_cphci->cphci_phci == ph &&
8219 8217                      strcmp(cpi->cpi_addr, pip->pi_addr) == 0) {
8220 8218                          cpi->cpi_pip = pip;
8221 8219                          if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) {
8222 8220                                  cpi->cpi_flags &=
8223 8221                                      ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8224 8222                                  sort_vhcache_paths(cct);
8225 8223                                  cache_updated = 1;
8226 8224                          }
8227 8225                          break;
8228 8226                  }
8229 8227          }
8230 8228  
8231 8229          if (cpi == NULL) {
8232 8230                  cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
8233 8231                  cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP);
8234 8232                  cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph);
8235 8233                  ASSERT(cpi->cpi_cphci != NULL);
8236 8234                  cpi->cpi_pip = pip;
8237 8235                  enqueue_vhcache_pathinfo(cct, cpi);
8238 8236                  cache_updated = 1;
8239 8237          }
8240 8238  
8241 8239          rw_exit(&vhcache->vhcache_lock);
8242 8240  
8243 8241          if (cache_updated)
8244 8242                  vhcache_dirty(vhc);
8245 8243  }
8246 8244  
8247 8245  /*
8248 8246   * Remove the reference to the specified path from the vhci cache.
8249 8247   */
8250 8248  static void
8251 8249  vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
8252 8250  {
8253 8251          mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8254 8252          mdi_client_t *ct = pip->pi_client;
8255 8253          mdi_vhcache_client_t *cct;
8256 8254          mdi_vhcache_pathinfo_t *cpi;
8257 8255  
8258 8256          rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8259 8257          if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
8260 8258              NULL)) != NULL) {
8261 8259                  for (cpi = cct->cct_cpi_head; cpi != NULL;
8262 8260                      cpi = cpi->cpi_next) {
8263 8261                          if (cpi->cpi_pip == pip) {
8264 8262                                  cpi->cpi_pip = NULL;
8265 8263                                  break;
8266 8264                          }
8267 8265                  }
8268 8266          }
8269 8267          rw_exit(&vhcache->vhcache_lock);
8270 8268  }
8271 8269  
8272 8270  /*
8273 8271   * Flush the vhci cache to disk.
8274 8272   * Returns MDI_SUCCESS on success, MDI_FAILURE on failure.
8275 8273   */
8276 8274  static int
8277 8275  flush_vhcache(mdi_vhci_config_t *vhc, int force_flag)
8278 8276  {
8279 8277          nvlist_t *nvl;
8280 8278          int err;
8281 8279          int rv;
8282 8280  
8283 8281          /*
8284 8282           * It is possible that the system may shutdown before
8285 8283           * i_ddi_io_initialized (during stmsboot for example). To allow for
8286 8284           * flushing the cache in this case do not check for
8287 8285           * i_ddi_io_initialized when force flag is set.
8288 8286           */
8289 8287          if (force_flag == 0 && !i_ddi_io_initialized())
8290 8288                  return (MDI_FAILURE);
8291 8289  
8292 8290          if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) {
8293 8291                  err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl);
8294 8292                  nvlist_free(nvl);
8295 8293          } else
8296 8294                  err = EFAULT;
8297 8295  
8298 8296          rv = MDI_SUCCESS;
8299 8297          mutex_enter(&vhc->vhc_lock);
8300 8298          if (err != 0) {
8301 8299                  if (err == EROFS) {
8302 8300                          vhc->vhc_flags |= MDI_VHC_READONLY_FS;
8303 8301                          vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR |
8304 8302                              MDI_VHC_VHCACHE_DIRTY);
8305 8303                  } else {
8306 8304                          if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) {
8307 8305                                  cmn_err(CE_CONT, "%s: update failed\n",
8308 8306                                      vhc->vhc_vhcache_filename);
8309 8307                                  vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR;
8310 8308                          }
8311 8309                          rv = MDI_FAILURE;
8312 8310                  }
8313 8311          } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) {
8314 8312                  cmn_err(CE_CONT,
8315 8313                      "%s: update now ok\n", vhc->vhc_vhcache_filename);
8316 8314                  vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR;
8317 8315          }
8318 8316          mutex_exit(&vhc->vhc_lock);
8319 8317  
8320 8318          return (rv);
8321 8319  }
8322 8320  
8323 8321  /*
8324 8322   * Call flush_vhcache() to flush the vhci cache at the scheduled time.
8325 8323   * Exits itself if left idle for the idle timeout period.
8326 8324   */
8327 8325  static void
8328 8326  vhcache_flush_thread(void *arg)
8329 8327  {
8330 8328          mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8331 8329          clock_t idle_time, quit_at_ticks;
8332 8330          callb_cpr_t cprinfo;
8333 8331  
8334 8332          /* number of seconds to sleep idle before exiting */
8335 8333          idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND;
8336 8334  
8337 8335          CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
8338 8336              "mdi_vhcache_flush");
8339 8337          mutex_enter(&vhc->vhc_lock);
8340 8338          for (; ; ) {
8341 8339                  while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8342 8340                      (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) {
8343 8341                          if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) {
8344 8342                                  CALLB_CPR_SAFE_BEGIN(&cprinfo);
8345 8343                                  (void) cv_timedwait(&vhc->vhc_cv,
8346 8344                                      &vhc->vhc_lock, vhc->vhc_flush_at_ticks);
8347 8345                                  CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8348 8346                          } else {
8349 8347                                  vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
8350 8348                                  mutex_exit(&vhc->vhc_lock);
8351 8349  
8352 8350                                  if (flush_vhcache(vhc, 0) != MDI_SUCCESS)
8353 8351                                          vhcache_dirty(vhc);
8354 8352  
8355 8353                                  mutex_enter(&vhc->vhc_lock);
8356 8354                          }
8357 8355                  }
8358 8356  
8359 8357                  quit_at_ticks = ddi_get_lbolt() + idle_time;
8360 8358  
8361 8359                  while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8362 8360                      !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) &&
8363 8361                      ddi_get_lbolt() < quit_at_ticks) {
8364 8362                          CALLB_CPR_SAFE_BEGIN(&cprinfo);
8365 8363                          (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8366 8364                              quit_at_ticks);
8367 8365                          CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8368 8366                  }
8369 8367  
8370 8368                  if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
8371 8369                      !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY))
8372 8370                          goto out;
8373 8371          }
8374 8372  
8375 8373  out:
8376 8374          vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD;
8377 8375          /* CALLB_CPR_EXIT releases the vhc->vhc_lock */
8378 8376          CALLB_CPR_EXIT(&cprinfo);
8379 8377  }
8380 8378  
8381 8379  /*
8382 8380   * Make vhci cache dirty and schedule flushing by vhcache flush thread.
8383 8381   */
8384 8382  static void
8385 8383  vhcache_dirty(mdi_vhci_config_t *vhc)
8386 8384  {
8387 8385          mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8388 8386          int create_thread;
8389 8387  
8390 8388          rw_enter(&vhcache->vhcache_lock, RW_READER);
8391 8389          /* do not flush cache until the cache is fully built */
8392 8390          if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
8393 8391                  rw_exit(&vhcache->vhcache_lock);
8394 8392                  return;
8395 8393          }
8396 8394          rw_exit(&vhcache->vhcache_lock);
8397 8395  
8398 8396          mutex_enter(&vhc->vhc_lock);
8399 8397          if (vhc->vhc_flags & MDI_VHC_READONLY_FS) {
8400 8398                  mutex_exit(&vhc->vhc_lock);
8401 8399                  return;
8402 8400          }
8403 8401  
8404 8402          vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY;
8405 8403          vhc->vhc_flush_at_ticks = ddi_get_lbolt() +
8406 8404              mdi_vhcache_flush_delay * TICKS_PER_SECOND;
8407 8405          if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
8408 8406                  cv_broadcast(&vhc->vhc_cv);
8409 8407                  create_thread = 0;
8410 8408          } else {
8411 8409                  vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD;
8412 8410                  create_thread = 1;
8413 8411          }
8414 8412          mutex_exit(&vhc->vhc_lock);
8415 8413  
8416 8414          if (create_thread)
8417 8415                  (void) thread_create(NULL, 0, vhcache_flush_thread, vhc,
8418 8416                      0, &p0, TS_RUN, minclsyspri);
8419 8417  }
8420 8418  
8421 8419  /*
8422 8420   * phci bus config structure - one for for each phci bus config operation that
8423 8421   * we initiate on behalf of a vhci.
8424 8422   */
8425 8423  typedef struct mdi_phci_bus_config_s {
8426 8424          char *phbc_phci_path;
8427 8425          struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */
8428 8426          struct mdi_phci_bus_config_s *phbc_next;
8429 8427  } mdi_phci_bus_config_t;
8430 8428  
8431 8429  /* vhci bus config structure - one for each vhci bus config operation */
8432 8430  typedef struct mdi_vhci_bus_config_s {
8433 8431          ddi_bus_config_op_t vhbc_op;    /* bus config op */
8434 8432          major_t vhbc_op_major;          /* bus config op major */
8435 8433          uint_t vhbc_op_flags;           /* bus config op flags */
8436 8434          kmutex_t vhbc_lock;
8437 8435          kcondvar_t vhbc_cv;
8438 8436          int vhbc_thr_count;
8439 8437  } mdi_vhci_bus_config_t;
8440 8438  
8441 8439  /*
8442 8440   * bus config the specified phci
8443 8441   */
8444 8442  static void
8445 8443  bus_config_phci(void *arg)
8446 8444  {
8447 8445          mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg;
8448 8446          mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig;
8449 8447          dev_info_t *ph_dip;
8450 8448  
8451 8449          /*
8452 8450           * first configure all path components upto phci and then configure
8453 8451           * the phci children.
8454 8452           */
8455 8453          if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0))
8456 8454              != NULL) {
8457 8455                  if (vhbc->vhbc_op == BUS_CONFIG_DRIVER ||
8458 8456                      vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) {
8459 8457                          (void) ndi_devi_config_driver(ph_dip,
8460 8458                              vhbc->vhbc_op_flags,
8461 8459                              vhbc->vhbc_op_major);
8462 8460                  } else
8463 8461                          (void) ndi_devi_config(ph_dip,
8464 8462                              vhbc->vhbc_op_flags);
8465 8463  
8466 8464                  /* release the hold that e_ddi_hold_devi_by_path() placed */
8467 8465                  ndi_rele_devi(ph_dip);
8468 8466          }
8469 8467  
8470 8468          kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1);
8471 8469          kmem_free(phbc, sizeof (*phbc));
8472 8470  
8473 8471          mutex_enter(&vhbc->vhbc_lock);
8474 8472          vhbc->vhbc_thr_count--;
8475 8473          if (vhbc->vhbc_thr_count == 0)
8476 8474                  cv_broadcast(&vhbc->vhbc_cv);
8477 8475          mutex_exit(&vhbc->vhbc_lock);
8478 8476  }
8479 8477  
8480 8478  /*
8481 8479   * Bus config all phcis associated with the vhci in parallel.
8482 8480   * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL.
8483 8481   */
8484 8482  static void
8485 8483  bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags,
8486 8484      ddi_bus_config_op_t op, major_t maj)
8487 8485  {
8488 8486          mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next;
8489 8487          mdi_vhci_bus_config_t *vhbc;
8490 8488          mdi_vhcache_phci_t *cphci;
8491 8489  
8492 8490          rw_enter(&vhcache->vhcache_lock, RW_READER);
8493 8491          if (vhcache->vhcache_phci_head == NULL) {
8494 8492                  rw_exit(&vhcache->vhcache_lock);
8495 8493                  return;
8496 8494          }
8497 8495  
8498 8496          vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP);
8499 8497  
8500 8498          for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8501 8499              cphci = cphci->cphci_next) {
8502 8500                  /* skip phcis that haven't attached before root is available */
8503 8501                  if (!modrootloaded && (cphci->cphci_phci == NULL))
8504 8502                          continue;
8505 8503                  phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP);
8506 8504                  phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path,
8507 8505                      KM_SLEEP);
8508 8506                  phbc->phbc_vhbusconfig = vhbc;
8509 8507                  phbc->phbc_next = phbc_head;
8510 8508                  phbc_head = phbc;
8511 8509                  vhbc->vhbc_thr_count++;
8512 8510          }
8513 8511          rw_exit(&vhcache->vhcache_lock);
8514 8512  
8515 8513          vhbc->vhbc_op = op;
8516 8514          vhbc->vhbc_op_major = maj;
8517 8515          vhbc->vhbc_op_flags = NDI_NO_EVENT |
8518 8516              (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE));
8519 8517          mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL);
8520 8518          cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL);
8521 8519  
8522 8520          /* now create threads to initiate bus config on all phcis in parallel */
8523 8521          for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) {
8524 8522                  phbc_next = phbc->phbc_next;
8525 8523                  if (mdi_mtc_off)
8526 8524                          bus_config_phci((void *)phbc);
8527 8525                  else
8528 8526                          (void) thread_create(NULL, 0, bus_config_phci, phbc,
8529 8527                              0, &p0, TS_RUN, minclsyspri);
8530 8528          }
8531 8529  
8532 8530          mutex_enter(&vhbc->vhbc_lock);
8533 8531          /* wait until all threads exit */
8534 8532          while (vhbc->vhbc_thr_count > 0)
8535 8533                  cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock);
8536 8534          mutex_exit(&vhbc->vhbc_lock);
8537 8535  
8538 8536          mutex_destroy(&vhbc->vhbc_lock);
8539 8537          cv_destroy(&vhbc->vhbc_cv);
8540 8538          kmem_free(vhbc, sizeof (*vhbc));
8541 8539  }
8542 8540  
8543 8541  /*
8544 8542   * Single threaded version of bus_config_all_phcis()
8545 8543   */
8546 8544  static void
8547 8545  st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags,
8548 8546      ddi_bus_config_op_t op, major_t maj)
8549 8547  {
8550 8548          mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8551 8549  
8552 8550          single_threaded_vhconfig_enter(vhc);
8553 8551          bus_config_all_phcis(vhcache, flags, op, maj);
8554 8552          single_threaded_vhconfig_exit(vhc);
8555 8553  }
8556 8554  
8557 8555  /*
8558 8556   * Perform BUS_CONFIG_ONE on the specified child of the phci.
8559 8557   * The path includes the child component in addition to the phci path.
8560 8558   */
8561 8559  static int
8562 8560  bus_config_one_phci_child(char *path)
8563 8561  {
8564 8562          dev_info_t *ph_dip, *child;
8565 8563          char *devnm;
8566 8564          int rv = MDI_FAILURE;
8567 8565  
8568 8566          /* extract the child component of the phci */
8569 8567          devnm = strrchr(path, '/');
8570 8568          *devnm++ = '\0';
8571 8569  
8572 8570          /*
8573 8571           * first configure all path components upto phci and then
8574 8572           * configure the phci child.
8575 8573           */
8576 8574          if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) {
8577 8575                  if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) ==
8578 8576                      NDI_SUCCESS) {
8579 8577                          /*
8580 8578                           * release the hold that ndi_devi_config_one() placed
8581 8579                           */
8582 8580                          ndi_rele_devi(child);
8583 8581                          rv = MDI_SUCCESS;
8584 8582                  }
8585 8583  
8586 8584                  /* release the hold that e_ddi_hold_devi_by_path() placed */
8587 8585                  ndi_rele_devi(ph_dip);
8588 8586          }
8589 8587  
8590 8588          devnm--;
8591 8589          *devnm = '/';
8592 8590          return (rv);
8593 8591  }
8594 8592  
8595 8593  /*
8596 8594   * Build a list of phci client paths for the specified vhci client.
8597 8595   * The list includes only those phci client paths which aren't configured yet.
8598 8596   */
8599 8597  static mdi_phys_path_t *
8600 8598  build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name)
8601 8599  {
8602 8600          mdi_vhcache_pathinfo_t *cpi;
8603 8601          mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp;
8604 8602          int config_path, len;
8605 8603  
8606 8604          for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8607 8605                  /*
8608 8606                   * include only those paths that aren't configured.
8609 8607                   */
8610 8608                  config_path = 0;
8611 8609                  if (cpi->cpi_pip == NULL)
8612 8610                          config_path = 1;
8613 8611                  else {
8614 8612                          MDI_PI_LOCK(cpi->cpi_pip);
8615 8613                          if (MDI_PI_IS_INIT(cpi->cpi_pip))
8616 8614                                  config_path = 1;
8617 8615                          MDI_PI_UNLOCK(cpi->cpi_pip);
8618 8616                  }
8619 8617  
8620 8618                  if (config_path) {
8621 8619                          pp = kmem_alloc(sizeof (*pp), KM_SLEEP);
8622 8620                          len = strlen(cpi->cpi_cphci->cphci_path) +
8623 8621                              strlen(ct_name) + strlen(cpi->cpi_addr) + 3;
8624 8622                          pp->phys_path = kmem_alloc(len, KM_SLEEP);
8625 8623                          (void) snprintf(pp->phys_path, len, "%s/%s@%s",
8626 8624                              cpi->cpi_cphci->cphci_path, ct_name,
8627 8625                              cpi->cpi_addr);
8628 8626                          pp->phys_path_next = NULL;
8629 8627  
8630 8628                          if (pp_head == NULL)
8631 8629                                  pp_head = pp;
8632 8630                          else
8633 8631                                  pp_tail->phys_path_next = pp;
8634 8632                          pp_tail = pp;
8635 8633                  }
8636 8634          }
8637 8635  
8638 8636          return (pp_head);
8639 8637  }
8640 8638  
8641 8639  /*
8642 8640   * Free the memory allocated for phci client path list.
8643 8641   */
8644 8642  static void
8645 8643  free_phclient_path_list(mdi_phys_path_t *pp_head)
8646 8644  {
8647 8645          mdi_phys_path_t *pp, *pp_next;
8648 8646  
8649 8647          for (pp = pp_head; pp != NULL; pp = pp_next) {
8650 8648                  pp_next = pp->phys_path_next;
8651 8649                  kmem_free(pp->phys_path, strlen(pp->phys_path) + 1);
8652 8650                  kmem_free(pp, sizeof (*pp));
8653 8651          }
8654 8652  }
8655 8653  
8656 8654  /*
8657 8655   * Allocated async client structure and initialize with the specified values.
8658 8656   */
8659 8657  static mdi_async_client_config_t *
8660 8658  alloc_async_client_config(char *ct_name, char *ct_addr,
8661 8659      mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8662 8660  {
8663 8661          mdi_async_client_config_t *acc;
8664 8662  
8665 8663          acc = kmem_alloc(sizeof (*acc), KM_SLEEP);
8666 8664          acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP);
8667 8665          acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP);
8668 8666          acc->acc_phclient_path_list_head = pp_head;
8669 8667          init_vhcache_lookup_token(&acc->acc_token, tok);
8670 8668          acc->acc_next = NULL;
8671 8669          return (acc);
8672 8670  }
8673 8671  
8674 8672  /*
8675 8673   * Free the memory allocated for the async client structure and their members.
8676 8674   */
8677 8675  static void
8678 8676  free_async_client_config(mdi_async_client_config_t *acc)
8679 8677  {
8680 8678          if (acc->acc_phclient_path_list_head)
8681 8679                  free_phclient_path_list(acc->acc_phclient_path_list_head);
8682 8680          kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1);
8683 8681          kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1);
8684 8682          kmem_free(acc, sizeof (*acc));
8685 8683  }
8686 8684  
8687 8685  /*
8688 8686   * Sort vhcache pathinfos (cpis) of the specified client.
8689 8687   * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
8690 8688   * flag set come at the beginning of the list. All cpis which have this
8691 8689   * flag set come at the end of the list.
8692 8690   */
8693 8691  static void
8694 8692  sort_vhcache_paths(mdi_vhcache_client_t *cct)
8695 8693  {
8696 8694          mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head;
8697 8695  
8698 8696          cpi_head = cct->cct_cpi_head;
8699 8697          cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
8700 8698          for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
8701 8699                  cpi_next = cpi->cpi_next;
8702 8700                  enqueue_vhcache_pathinfo(cct, cpi);
8703 8701          }
8704 8702  }
8705 8703  
8706 8704  /*
8707 8705   * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for
8708 8706   * every vhcache pathinfo of the specified client. If not adjust the flag
8709 8707   * setting appropriately.
8710 8708   *
8711 8709   * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the
8712 8710   * on-disk vhci cache. So every time this flag is updated the cache must be
8713 8711   * flushed.
8714 8712   */
8715 8713  static void
8716 8714  adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8717 8715      mdi_vhcache_lookup_token_t *tok)
8718 8716  {
8719 8717          mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8720 8718          mdi_vhcache_client_t *cct;
8721 8719          mdi_vhcache_pathinfo_t *cpi;
8722 8720  
8723 8721          rw_enter(&vhcache->vhcache_lock, RW_READER);
8724 8722          if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok))
8725 8723              == NULL) {
8726 8724                  rw_exit(&vhcache->vhcache_lock);
8727 8725                  return;
8728 8726          }
8729 8727  
8730 8728          /*
8731 8729           * to avoid unnecessary on-disk cache updates, first check if an
8732 8730           * update is really needed. If no update is needed simply return.
8733 8731           */
8734 8732          for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8735 8733                  if ((cpi->cpi_pip != NULL &&
8736 8734                      (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) ||
8737 8735                      (cpi->cpi_pip == NULL &&
8738 8736                      !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) {
8739 8737                          break;
8740 8738                  }
8741 8739          }
8742 8740          if (cpi == NULL) {
8743 8741                  rw_exit(&vhcache->vhcache_lock);
8744 8742                  return;
8745 8743          }
8746 8744  
8747 8745          if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) {
8748 8746                  rw_exit(&vhcache->vhcache_lock);
8749 8747                  rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8750 8748                  if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr,
8751 8749                      tok)) == NULL) {
8752 8750                          rw_exit(&vhcache->vhcache_lock);
8753 8751                          return;
8754 8752                  }
8755 8753          }
8756 8754  
8757 8755          for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8758 8756                  if (cpi->cpi_pip != NULL)
8759 8757                          cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8760 8758                  else
8761 8759                          cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8762 8760          }
8763 8761          sort_vhcache_paths(cct);
8764 8762  
8765 8763          rw_exit(&vhcache->vhcache_lock);
8766 8764          vhcache_dirty(vhc);
8767 8765  }
8768 8766  
8769 8767  /*
8770 8768   * Configure all specified paths of the client.
8771 8769   */
8772 8770  static void
8773 8771  config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8774 8772      mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8775 8773  {
8776 8774          mdi_phys_path_t *pp;
8777 8775  
8778 8776          for (pp = pp_head; pp != NULL; pp = pp->phys_path_next)
8779 8777                  (void) bus_config_one_phci_child(pp->phys_path);
8780 8778          adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok);
8781 8779  }
8782 8780  
8783 8781  /*
8784 8782   * Dequeue elements from vhci async client config list and bus configure
8785 8783   * their corresponding phci clients.
8786 8784   */
8787 8785  static void
8788 8786  config_client_paths_thread(void *arg)
8789 8787  {
8790 8788          mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8791 8789          mdi_async_client_config_t *acc;
8792 8790          clock_t quit_at_ticks;
8793 8791          clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND;
8794 8792          callb_cpr_t cprinfo;
8795 8793  
8796 8794          CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
8797 8795              "mdi_config_client_paths");
8798 8796  
8799 8797          for (; ; ) {
8800 8798                  quit_at_ticks = ddi_get_lbolt() + idle_time;
8801 8799  
8802 8800                  mutex_enter(&vhc->vhc_lock);
8803 8801                  while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8804 8802                      vhc->vhc_acc_list_head == NULL &&
8805 8803                      ddi_get_lbolt() < quit_at_ticks) {
8806 8804                          CALLB_CPR_SAFE_BEGIN(&cprinfo);
8807 8805                          (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8808 8806                              quit_at_ticks);
8809 8807                          CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8810 8808                  }
8811 8809  
8812 8810                  if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
8813 8811                      vhc->vhc_acc_list_head == NULL)
8814 8812                          goto out;
8815 8813  
8816 8814                  acc = vhc->vhc_acc_list_head;
8817 8815                  vhc->vhc_acc_list_head = acc->acc_next;
8818 8816                  if (vhc->vhc_acc_list_head == NULL)
8819 8817                          vhc->vhc_acc_list_tail = NULL;
8820 8818                  vhc->vhc_acc_count--;
8821 8819                  mutex_exit(&vhc->vhc_lock);
8822 8820  
8823 8821                  config_client_paths_sync(vhc, acc->acc_ct_name,
8824 8822                      acc->acc_ct_addr, acc->acc_phclient_path_list_head,
8825 8823                      &acc->acc_token);
8826 8824  
8827 8825                  free_async_client_config(acc);
8828 8826          }
8829 8827  
8830 8828  out:
8831 8829          vhc->vhc_acc_thrcount--;
8832 8830          /* CALLB_CPR_EXIT releases the vhc->vhc_lock */
8833 8831          CALLB_CPR_EXIT(&cprinfo);
8834 8832  }
8835 8833  
8836 8834  /*
8837 8835   * Arrange for all the phci client paths (pp_head) for the specified client
8838 8836   * to be bus configured asynchronously by a thread.
8839 8837   */
8840 8838  static void
8841 8839  config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8842 8840      mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8843 8841  {
8844 8842          mdi_async_client_config_t *acc, *newacc;
8845 8843          int create_thread;
8846 8844  
8847 8845          if (pp_head == NULL)
8848 8846                  return;
8849 8847  
8850 8848          if (mdi_mtc_off) {
8851 8849                  config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok);
8852 8850                  free_phclient_path_list(pp_head);
8853 8851                  return;
8854 8852          }
8855 8853  
8856 8854          newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok);
8857 8855          ASSERT(newacc);
8858 8856  
8859 8857          mutex_enter(&vhc->vhc_lock);
8860 8858          for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) {
8861 8859                  if (strcmp(ct_name, acc->acc_ct_name) == 0 &&
8862 8860                      strcmp(ct_addr, acc->acc_ct_addr) == 0) {
8863 8861                          free_async_client_config(newacc);
8864 8862                          mutex_exit(&vhc->vhc_lock);
8865 8863                          return;
8866 8864                  }
8867 8865          }
8868 8866  
8869 8867          if (vhc->vhc_acc_list_head == NULL)
8870 8868                  vhc->vhc_acc_list_head = newacc;
8871 8869          else
8872 8870                  vhc->vhc_acc_list_tail->acc_next = newacc;
8873 8871          vhc->vhc_acc_list_tail = newacc;
8874 8872          vhc->vhc_acc_count++;
8875 8873          if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) {
8876 8874                  cv_broadcast(&vhc->vhc_cv);
8877 8875                  create_thread = 0;
8878 8876          } else {
8879 8877                  vhc->vhc_acc_thrcount++;
8880 8878                  create_thread = 1;
8881 8879          }
8882 8880          mutex_exit(&vhc->vhc_lock);
8883 8881  
8884 8882          if (create_thread)
8885 8883                  (void) thread_create(NULL, 0, config_client_paths_thread, vhc,
8886 8884                      0, &p0, TS_RUN, minclsyspri);
8887 8885  }
8888 8886  
8889 8887  /*
8890 8888   * Return number of online paths for the specified client.
8891 8889   */
8892 8890  static int
8893 8891  nonline_paths(mdi_vhcache_client_t *cct)
8894 8892  {
8895 8893          mdi_vhcache_pathinfo_t *cpi;
8896 8894          int online_count = 0;
8897 8895  
8898 8896          for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8899 8897                  if (cpi->cpi_pip != NULL) {
8900 8898                          MDI_PI_LOCK(cpi->cpi_pip);
8901 8899                          if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE)
8902 8900                                  online_count++;
8903 8901                          MDI_PI_UNLOCK(cpi->cpi_pip);
8904 8902                  }
8905 8903          }
8906 8904  
8907 8905          return (online_count);
8908 8906  }
8909 8907  
8910 8908  /*
8911 8909   * Bus configure all paths for the specified vhci client.
8912 8910   * If at least one path for the client is already online, the remaining paths
8913 8911   * will be configured asynchronously. Otherwise, it synchronously configures
8914 8912   * the paths until at least one path is online and then rest of the paths
8915 8913   * will be configured asynchronously.
8916 8914   */
8917 8915  static void
8918 8916  config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr)
8919 8917  {
8920 8918          mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8921 8919          mdi_phys_path_t *pp_head, *pp;
8922 8920          mdi_vhcache_client_t *cct;
8923 8921          mdi_vhcache_lookup_token_t tok;
8924 8922  
8925 8923          ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8926 8924  
8927 8925          init_vhcache_lookup_token(&tok, NULL);
8928 8926  
8929 8927          if (ct_name == NULL || ct_addr == NULL ||
8930 8928              (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok))
8931 8929              == NULL ||
8932 8930              (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) {
8933 8931                  rw_exit(&vhcache->vhcache_lock);
8934 8932                  return;
8935 8933          }
8936 8934  
8937 8935          /* if at least one path is online, configure the rest asynchronously */
8938 8936          if (nonline_paths(cct) > 0) {
8939 8937                  rw_exit(&vhcache->vhcache_lock);
8940 8938                  config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok);
8941 8939                  return;
8942 8940          }
8943 8941  
8944 8942          rw_exit(&vhcache->vhcache_lock);
8945 8943  
8946 8944          for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) {
8947 8945                  if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) {
8948 8946                          rw_enter(&vhcache->vhcache_lock, RW_READER);
8949 8947  
8950 8948                          if ((cct = lookup_vhcache_client(vhcache, ct_name,
8951 8949                              ct_addr, &tok)) == NULL) {
8952 8950                                  rw_exit(&vhcache->vhcache_lock);
8953 8951                                  goto out;
8954 8952                          }
8955 8953  
8956 8954                          if (nonline_paths(cct) > 0 &&
8957 8955                              pp->phys_path_next != NULL) {
8958 8956                                  rw_exit(&vhcache->vhcache_lock);
8959 8957                                  config_client_paths_async(vhc, ct_name, ct_addr,
8960 8958                                      pp->phys_path_next, &tok);
8961 8959                                  pp->phys_path_next = NULL;
8962 8960                                  goto out;
8963 8961                          }
8964 8962  
8965 8963                          rw_exit(&vhcache->vhcache_lock);
8966 8964                  }
8967 8965          }
8968 8966  
8969 8967          adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok);
8970 8968  out:
8971 8969          free_phclient_path_list(pp_head);
8972 8970  }
8973 8971  
8974 8972  static void
8975 8973  single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc)
8976 8974  {
8977 8975          mutex_enter(&vhc->vhc_lock);
8978 8976          while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED)
8979 8977                  cv_wait(&vhc->vhc_cv, &vhc->vhc_lock);
8980 8978          vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED;
8981 8979          mutex_exit(&vhc->vhc_lock);
8982 8980  }
8983 8981  
8984 8982  static void
8985 8983  single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc)
8986 8984  {
8987 8985          mutex_enter(&vhc->vhc_lock);
8988 8986          vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED;
8989 8987          cv_broadcast(&vhc->vhc_cv);
8990 8988          mutex_exit(&vhc->vhc_lock);
8991 8989  }
8992 8990  
8993 8991  typedef struct mdi_phci_driver_info {
8994 8992          char    *phdriver_name; /* name of the phci driver */
8995 8993  
8996 8994          /* set to non zero if the phci driver supports root device */
8997 8995          int     phdriver_root_support;
8998 8996  } mdi_phci_driver_info_t;
8999 8997  
9000 8998  /*
9001 8999   * vhci class and root support capability of a phci driver can be
9002 9000   * specified using ddi-vhci-class and ddi-no-root-support properties in the
9003 9001   * phci driver.conf file. The built-in tables below contain this information
9004 9002   * for those phci drivers whose driver.conf files don't yet contain this info.
9005 9003   *
9006 9004   * All phci drivers expect iscsi have root device support.
9007 9005   */
9008 9006  static mdi_phci_driver_info_t scsi_phci_driver_list[] = {
9009 9007          { "fp", 1 },
9010 9008          { "iscsi", 0 },
9011 9009          { "ibsrp", 1 }
9012 9010          };
9013 9011  
9014 9012  static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 };
9015 9013  
9016 9014  static void *
9017 9015  mdi_realloc(void *old_ptr, size_t old_size, size_t new_size)
9018 9016  {
9019 9017          void *new_ptr;
9020 9018  
9021 9019          new_ptr = kmem_zalloc(new_size, KM_SLEEP);
9022 9020          if (old_ptr) {
9023 9021                  bcopy(old_ptr, new_ptr, MIN(old_size, new_size));
9024 9022                  kmem_free(old_ptr, old_size);
9025 9023          }
9026 9024          return (new_ptr);
9027 9025  }
9028 9026  
9029 9027  static void
9030 9028  add_to_phci_list(char ***driver_list, int **root_support_list,
9031 9029      int *cur_elements, int *max_elements, char *driver_name, int root_support)
9032 9030  {
9033 9031          ASSERT(*cur_elements <= *max_elements);
9034 9032          if (*cur_elements == *max_elements) {
9035 9033                  *max_elements += 10;
9036 9034                  *driver_list = mdi_realloc(*driver_list,
9037 9035                      sizeof (char *) * (*cur_elements),
9038 9036                      sizeof (char *) * (*max_elements));
9039 9037                  *root_support_list = mdi_realloc(*root_support_list,
9040 9038                      sizeof (int) * (*cur_elements),
9041 9039                      sizeof (int) * (*max_elements));
9042 9040          }
9043 9041          (*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP);
9044 9042          (*root_support_list)[*cur_elements] = root_support;
9045 9043          (*cur_elements)++;
9046 9044  }
9047 9045  
9048 9046  static void
9049 9047  get_phci_driver_list(char *vhci_class, char ***driver_list,
9050 9048      int **root_support_list, int *cur_elements, int *max_elements)
9051 9049  {
9052 9050          mdi_phci_driver_info_t  *st_driver_list, *p;
9053 9051          int             st_ndrivers, root_support, i, j, driver_conf_count;
9054 9052          major_t         m;
9055 9053          struct devnames *dnp;
9056 9054          ddi_prop_t      *propp;
9057 9055  
9058 9056          *driver_list = NULL;
9059 9057          *root_support_list = NULL;
9060 9058          *cur_elements = 0;
9061 9059          *max_elements = 0;
9062 9060  
9063 9061          /* add the phci drivers derived from the phci driver.conf files */
9064 9062          for (m = 0; m < devcnt; m++) {
9065 9063                  dnp = &devnamesp[m];
9066 9064  
9067 9065                  if (dnp->dn_flags & DN_PHCI_DRIVER) {
9068 9066                          LOCK_DEV_OPS(&dnp->dn_lock);
9069 9067                          if (dnp->dn_global_prop_ptr != NULL &&
9070 9068                              (propp = i_ddi_prop_search(DDI_DEV_T_ANY,
9071 9069                              DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING,
9072 9070                              &dnp->dn_global_prop_ptr->prop_list)) != NULL &&
9073 9071                              strcmp(propp->prop_val, vhci_class) == 0) {
9074 9072  
9075 9073                                  root_support = (i_ddi_prop_search(DDI_DEV_T_ANY,
9076 9074                                      DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT,
9077 9075                                      &dnp->dn_global_prop_ptr->prop_list)
9078 9076                                      == NULL) ? 1 : 0;
9079 9077  
9080 9078                                  add_to_phci_list(driver_list, root_support_list,
9081 9079                                      cur_elements, max_elements, dnp->dn_name,
9082 9080                                      root_support);
9083 9081  
9084 9082                                  UNLOCK_DEV_OPS(&dnp->dn_lock);
9085 9083                          } else
9086 9084                                  UNLOCK_DEV_OPS(&dnp->dn_lock);
9087 9085                  }
9088 9086          }
9089 9087  
9090 9088          driver_conf_count = *cur_elements;
9091 9089  
9092 9090          /* add the phci drivers specified in the built-in tables */
9093 9091          if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) {
9094 9092                  st_driver_list = scsi_phci_driver_list;
9095 9093                  st_ndrivers = sizeof (scsi_phci_driver_list) /
9096 9094                      sizeof (mdi_phci_driver_info_t);
9097 9095          } else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) {
9098 9096                  st_driver_list = ib_phci_driver_list;
9099 9097                  st_ndrivers = sizeof (ib_phci_driver_list) /
9100 9098                      sizeof (mdi_phci_driver_info_t);
9101 9099          } else {
9102 9100                  st_driver_list = NULL;
9103 9101                  st_ndrivers = 0;
9104 9102          }
9105 9103  
9106 9104          for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) {
9107 9105                  /* add this phci driver if not already added before */
9108 9106                  for (j = 0; j < driver_conf_count; j++) {
9109 9107                          if (strcmp((*driver_list)[j], p->phdriver_name) == 0)
9110 9108                                  break;
9111 9109                  }
9112 9110                  if (j == driver_conf_count) {
9113 9111                          add_to_phci_list(driver_list, root_support_list,
9114 9112                              cur_elements, max_elements, p->phdriver_name,
9115 9113                              p->phdriver_root_support);
9116 9114                  }
9117 9115          }
9118 9116  }
9119 9117  
9120 9118  /*
9121 9119   * Attach the phci driver instances associated with the specified vhci class.
9122 9120   * If root is mounted attach all phci driver instances.
9123 9121   * If root is not mounted, attach the instances of only those phci
9124 9122   * drivers that have the root support.
9125 9123   */
9126 9124  static void
9127 9125  attach_phci_drivers(char *vhci_class)
9128 9126  {
9129 9127          char    **driver_list, **p;
9130 9128          int     *root_support_list;
9131 9129          int     cur_elements, max_elements, i;
9132 9130          major_t m;
9133 9131  
9134 9132          get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
9135 9133              &cur_elements, &max_elements);
9136 9134  
9137 9135          for (i = 0; i < cur_elements; i++) {
9138 9136                  if (modrootloaded || root_support_list[i]) {
9139 9137                          m = ddi_name_to_major(driver_list[i]);
9140 9138                          if (m != DDI_MAJOR_T_NONE &&
9141 9139                              ddi_hold_installed_driver(m))
9142 9140                                  ddi_rele_driver(m);
9143 9141                  }
9144 9142          }
9145 9143  
9146 9144          if (driver_list) {
9147 9145                  for (i = 0, p = driver_list; i < cur_elements; i++, p++)
9148 9146                          kmem_free(*p, strlen(*p) + 1);
9149 9147                  kmem_free(driver_list, sizeof (char *) * max_elements);
9150 9148                  kmem_free(root_support_list, sizeof (int) * max_elements);
9151 9149          }
9152 9150  }
9153 9151  
9154 9152  /*
9155 9153   * Build vhci cache:
9156 9154   *
9157 9155   * Attach phci driver instances and then drive BUS_CONFIG_ALL on
9158 9156   * the phci driver instances. During this process the cache gets built.
9159 9157   *
9160 9158   * Cache is built fully if the root is mounted.
9161 9159   * If the root is not mounted, phci drivers that do not have root support
9162 9160   * are not attached. As a result the cache is built partially. The entries
9163 9161   * in the cache reflect only those phci drivers that have root support.
9164 9162   */
9165 9163  static int
9166 9164  build_vhci_cache(mdi_vhci_t *vh)
9167 9165  {
9168 9166          mdi_vhci_config_t *vhc = vh->vh_config;
9169 9167          mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9170 9168  
9171 9169          single_threaded_vhconfig_enter(vhc);
9172 9170  
9173 9171          rw_enter(&vhcache->vhcache_lock, RW_READER);
9174 9172          if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) {
9175 9173                  rw_exit(&vhcache->vhcache_lock);
9176 9174                  single_threaded_vhconfig_exit(vhc);
9177 9175                  return (0);
9178 9176          }
9179 9177          rw_exit(&vhcache->vhcache_lock);
9180 9178  
9181 9179          attach_phci_drivers(vh->vh_class);
9182 9180          bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT,
9183 9181              BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
9184 9182  
9185 9183          rw_enter(&vhcache->vhcache_lock, RW_WRITER);
9186 9184          vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
9187 9185          rw_exit(&vhcache->vhcache_lock);
9188 9186  
9189 9187          single_threaded_vhconfig_exit(vhc);
9190 9188          vhcache_dirty(vhc);
9191 9189          return (1);
9192 9190  }
9193 9191  
9194 9192  /*
9195 9193   * Determine if discovery of paths is needed.
9196 9194   */
9197 9195  static int
9198 9196  vhcache_do_discovery(mdi_vhci_config_t *vhc)
9199 9197  {
9200 9198          int rv = 1;
9201 9199  
9202 9200          mutex_enter(&vhc->vhc_lock);
9203 9201          if (i_ddi_io_initialized() == 0) {
9204 9202                  if (vhc->vhc_path_discovery_boot > 0) {
9205 9203                          vhc->vhc_path_discovery_boot--;
9206 9204                          goto out;
9207 9205                  }
9208 9206          } else {
9209 9207                  if (vhc->vhc_path_discovery_postboot > 0) {
9210 9208                          vhc->vhc_path_discovery_postboot--;
9211 9209                          goto out;
9212 9210                  }
9213 9211          }
9214 9212  
9215 9213          /*
9216 9214           * Do full path discovery at most once per mdi_path_discovery_interval.
9217 9215           * This is to avoid a series of full path discoveries when opening
9218 9216           * stale /dev/[r]dsk links.
9219 9217           */
9220 9218          if (mdi_path_discovery_interval != -1 &&
9221 9219              ddi_get_lbolt64() >= vhc->vhc_path_discovery_cutoff_time)
9222 9220                  goto out;
9223 9221  
9224 9222          rv = 0;
9225 9223  out:
9226 9224          mutex_exit(&vhc->vhc_lock);
9227 9225          return (rv);
9228 9226  }
9229 9227  
9230 9228  /*
9231 9229   * Discover all paths:
9232 9230   *
9233 9231   * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci
9234 9232   * driver instances. During this process all paths will be discovered.
9235 9233   */
9236 9234  static int
9237 9235  vhcache_discover_paths(mdi_vhci_t *vh)
9238 9236  {
9239 9237          mdi_vhci_config_t *vhc = vh->vh_config;
9240 9238          mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9241 9239          int rv = 0;
9242 9240  
9243 9241          single_threaded_vhconfig_enter(vhc);
9244 9242  
9245 9243          if (vhcache_do_discovery(vhc)) {
9246 9244                  attach_phci_drivers(vh->vh_class);
9247 9245                  bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE |
9248 9246                      NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
9249 9247  
9250 9248                  mutex_enter(&vhc->vhc_lock);
9251 9249                  vhc->vhc_path_discovery_cutoff_time = ddi_get_lbolt64() +
9252 9250                      mdi_path_discovery_interval * TICKS_PER_SECOND;
9253 9251                  mutex_exit(&vhc->vhc_lock);
9254 9252                  rv = 1;
9255 9253          }
9256 9254  
9257 9255          single_threaded_vhconfig_exit(vhc);
9258 9256          return (rv);
9259 9257  }
9260 9258  
9261 9259  /*
9262 9260   * Generic vhci bus config implementation:
9263 9261   *
9264 9262   * Parameters
9265 9263   *      vdip    vhci dip
9266 9264   *      flags   bus config flags
9267 9265   *      op      bus config operation
9268 9266   *      The remaining parameters are bus config operation specific
9269 9267   *
9270 9268   * for BUS_CONFIG_ONE
9271 9269   *      arg     pointer to name@addr
9272 9270   *      child   upon successful return from this function, *child will be
9273 9271   *              set to the configured and held devinfo child node of vdip.
9274 9272   *      ct_addr pointer to client address (i.e. GUID)
9275 9273   *
9276 9274   * for BUS_CONFIG_DRIVER
9277 9275   *      arg     major number of the driver
9278 9276   *      child and ct_addr parameters are ignored
9279 9277   *
9280 9278   * for BUS_CONFIG_ALL
9281 9279   *      arg, child, and ct_addr parameters are ignored
9282 9280   *
9283 9281   * Note that for the rest of the bus config operations, this function simply
9284 9282   * calls the framework provided default bus config routine.
9285 9283   */
9286 9284  int
9287 9285  mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op,
9288 9286      void *arg, dev_info_t **child, char *ct_addr)
9289 9287  {
9290 9288          mdi_vhci_t *vh = i_devi_get_vhci(vdip);
9291 9289          mdi_vhci_config_t *vhc = vh->vh_config;
9292 9290          mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9293 9291          int rv = 0;
9294 9292          int params_valid = 0;
9295 9293          char *cp;
9296 9294  
9297 9295          /*
9298 9296           * To bus config vhcis we relay operation, possibly using another
9299 9297           * thread, to phcis. The phci driver then interacts with MDI to cause
9300 9298           * vhci child nodes to be enumerated under the vhci node.  Adding a
9301 9299           * vhci child requires an ndi_devi_enter of the vhci. Since another
9302 9300           * thread may be adding the child, to avoid deadlock we can't wait
9303 9301           * for the relayed operations to complete if we have already entered
9304 9302           * the vhci node.
9305 9303           */
9306 9304          if (DEVI_BUSY_OWNED(vdip)) {
9307 9305                  MDI_DEBUG(2, (MDI_NOTE, vdip,
9308 9306                      "vhci dip is busy owned %p", (void *)vdip));
9309 9307                  goto default_bus_config;
9310 9308          }
9311 9309  
9312 9310          rw_enter(&vhcache->vhcache_lock, RW_READER);
9313 9311          if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
9314 9312                  rw_exit(&vhcache->vhcache_lock);
9315 9313                  rv = build_vhci_cache(vh);
9316 9314                  rw_enter(&vhcache->vhcache_lock, RW_READER);
9317 9315          }
9318 9316  
9319 9317          switch (op) {
9320 9318          case BUS_CONFIG_ONE:
9321 9319                  if (arg != NULL && ct_addr != NULL) {
9322 9320                          /* extract node name */
9323 9321                          cp = (char *)arg;
9324 9322                          while (*cp != '\0' && *cp != '@')
9325 9323                                  cp++;
9326 9324                          if (*cp == '@') {
9327 9325                                  params_valid = 1;
9328 9326                                  *cp = '\0';
9329 9327                                  config_client_paths(vhc, (char *)arg, ct_addr);
9330 9328                                  /* config_client_paths() releases cache_lock */
9331 9329                                  *cp = '@';
9332 9330                                  break;
9333 9331                          }
9334 9332                  }
9335 9333  
9336 9334                  rw_exit(&vhcache->vhcache_lock);
9337 9335                  break;
9338 9336  
9339 9337          case BUS_CONFIG_DRIVER:
9340 9338                  rw_exit(&vhcache->vhcache_lock);
9341 9339                  if (rv == 0)
9342 9340                          st_bus_config_all_phcis(vhc, flags, op,
9343 9341                              (major_t)(uintptr_t)arg);
9344 9342                  break;
9345 9343  
9346 9344          case BUS_CONFIG_ALL:
9347 9345                  rw_exit(&vhcache->vhcache_lock);
9348 9346                  if (rv == 0)
9349 9347                          st_bus_config_all_phcis(vhc, flags, op, -1);
9350 9348                  break;
9351 9349  
9352 9350          default:
9353 9351                  rw_exit(&vhcache->vhcache_lock);
9354 9352                  break;
9355 9353          }
9356 9354  
9357 9355  
9358 9356  default_bus_config:
9359 9357          /*
9360 9358           * All requested child nodes are enumerated under the vhci.
9361 9359           * Now configure them.
9362 9360           */
9363 9361          if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
9364 9362              NDI_SUCCESS) {
9365 9363                  return (MDI_SUCCESS);
9366 9364          } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) {
9367 9365                  /* discover all paths and try configuring again */
9368 9366                  if (vhcache_discover_paths(vh) &&
9369 9367                      ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
9370 9368                      NDI_SUCCESS)
9371 9369                          return (MDI_SUCCESS);
9372 9370          }
9373 9371  
9374 9372          return (MDI_FAILURE);
9375 9373  }
9376 9374  
9377 9375  /*
9378 9376   * Read the on-disk vhci cache into an nvlist for the specified vhci class.
9379 9377   */
9380 9378  static nvlist_t *
9381 9379  read_on_disk_vhci_cache(char *vhci_class)
9382 9380  {
9383 9381          nvlist_t *nvl;
9384 9382          int err;
9385 9383          char *filename;
9386 9384  
9387 9385          filename = vhclass2vhcache_filename(vhci_class);
9388 9386  
9389 9387          if ((err = fread_nvlist(filename, &nvl)) == 0) {
9390 9388                  kmem_free(filename, strlen(filename) + 1);
9391 9389                  return (nvl);
9392 9390          } else if (err == EIO)
9393 9391                  cmn_err(CE_WARN, "%s: I/O error, will recreate", filename);
9394 9392          else if (err == EINVAL)
9395 9393                  cmn_err(CE_WARN,
9396 9394                      "%s: data file corrupted, will recreate", filename);
9397 9395  
9398 9396          kmem_free(filename, strlen(filename) + 1);
9399 9397          return (NULL);
9400 9398  }
9401 9399  
9402 9400  /*
9403 9401   * Read on-disk vhci cache into nvlists for all vhci classes.
9404 9402   * Called during booting by i_ddi_read_devices_files().
9405 9403   */
9406 9404  void
9407 9405  mdi_read_devices_files(void)
9408 9406  {
9409 9407          int i;
9410 9408  
9411 9409          for (i = 0; i < N_VHCI_CLASSES; i++)
9412 9410                  vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]);
9413 9411  }
9414 9412  
9415 9413  /*
9416 9414   * Remove all stale entries from vhci cache.
9417 9415   */
9418 9416  static void
9419 9417  clean_vhcache(mdi_vhci_config_t *vhc)
9420 9418  {
9421 9419          mdi_vhci_cache_t        *vhcache = &vhc->vhc_vhcache;
9422 9420          mdi_vhcache_phci_t      *phci, *nxt_phci;
9423 9421          mdi_vhcache_client_t    *client, *nxt_client;
9424 9422          mdi_vhcache_pathinfo_t  *path, *nxt_path;
9425 9423  
9426 9424          rw_enter(&vhcache->vhcache_lock, RW_WRITER);
9427 9425  
9428 9426          client = vhcache->vhcache_client_head;
9429 9427          vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL;
9430 9428          for ( ; client != NULL; client = nxt_client) {
9431 9429                  nxt_client = client->cct_next;
9432 9430  
9433 9431                  path = client->cct_cpi_head;
9434 9432                  client->cct_cpi_head = client->cct_cpi_tail = NULL;
9435 9433                  for ( ; path != NULL; path = nxt_path) {
9436 9434                          nxt_path = path->cpi_next;
9437 9435                          if ((path->cpi_cphci->cphci_phci != NULL) &&
9438 9436                              (path->cpi_pip != NULL)) {
9439 9437                                  enqueue_tail_vhcache_pathinfo(client, path);
9440 9438                          } else if (path->cpi_pip != NULL) {
9441 9439                                  /* Not valid to have a path without a phci. */
9442 9440                                  free_vhcache_pathinfo(path);
9443 9441                          }
9444 9442                  }
9445 9443  
9446 9444                  if (client->cct_cpi_head != NULL)
9447 9445                          enqueue_vhcache_client(vhcache, client);
9448 9446                  else {
9449 9447                          (void) mod_hash_destroy(vhcache->vhcache_client_hash,
9450 9448                              (mod_hash_key_t)client->cct_name_addr);
9451 9449                          free_vhcache_client(client);
9452 9450                  }
9453 9451          }
9454 9452  
9455 9453          phci = vhcache->vhcache_phci_head;
9456 9454          vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL;
9457 9455          for ( ; phci != NULL; phci = nxt_phci) {
9458 9456  
9459 9457                  nxt_phci = phci->cphci_next;
9460 9458                  if (phci->cphci_phci != NULL)
9461 9459                          enqueue_vhcache_phci(vhcache, phci);
9462 9460                  else
9463 9461                          free_vhcache_phci(phci);
9464 9462          }
9465 9463  
9466 9464          vhcache->vhcache_clean_time = ddi_get_lbolt64();
9467 9465          rw_exit(&vhcache->vhcache_lock);
9468 9466          vhcache_dirty(vhc);
9469 9467  }
9470 9468  
9471 9469  /*
9472 9470   * Remove all stale entries from vhci cache.
9473 9471   * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C
9474 9472   */
9475 9473  void
9476 9474  mdi_clean_vhcache(void)
9477 9475  {
9478 9476          mdi_vhci_t *vh;
9479 9477  
9480 9478          mutex_enter(&mdi_mutex);
9481 9479          for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9482 9480                  vh->vh_refcnt++;
9483 9481                  mutex_exit(&mdi_mutex);
9484 9482                  clean_vhcache(vh->vh_config);
9485 9483                  mutex_enter(&mdi_mutex);
9486 9484                  vh->vh_refcnt--;
9487 9485          }
9488 9486          mutex_exit(&mdi_mutex);
9489 9487  }
9490 9488  
9491 9489  /*
9492 9490   * mdi_vhci_walk_clients():
9493 9491   *              Walker routine to traverse client dev_info nodes
9494 9492   * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree
9495 9493   * below the client, including nexus devices, which we dont want.
9496 9494   * So we just traverse the immediate siblings, starting from 1st client.
9497 9495   */
9498 9496  void
9499 9497  mdi_vhci_walk_clients(dev_info_t *vdip,
9500 9498      int (*f)(dev_info_t *, void *), void *arg)
9501 9499  {
9502 9500          mdi_vhci_t      *vh = i_devi_get_vhci(vdip);
9503 9501          dev_info_t      *cdip;
9504 9502          mdi_client_t    *ct;
9505 9503  
9506 9504          MDI_VHCI_CLIENT_LOCK(vh);
9507 9505          cdip = ddi_get_child(vdip);
9508 9506          while (cdip) {
9509 9507                  ct = i_devi_get_client(cdip);
9510 9508                  MDI_CLIENT_LOCK(ct);
9511 9509  
9512 9510                  if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE)
9513 9511                          cdip = ddi_get_next_sibling(cdip);
9514 9512                  else
9515 9513                          cdip = NULL;
9516 9514  
9517 9515                  MDI_CLIENT_UNLOCK(ct);
9518 9516          }
9519 9517          MDI_VHCI_CLIENT_UNLOCK(vh);
9520 9518  }
9521 9519  
9522 9520  /*
9523 9521   * mdi_vhci_walk_phcis():
9524 9522   *              Walker routine to traverse phci dev_info nodes
9525 9523   */
9526 9524  void
9527 9525  mdi_vhci_walk_phcis(dev_info_t *vdip,
9528 9526      int (*f)(dev_info_t *, void *), void *arg)
9529 9527  {
9530 9528          mdi_vhci_t      *vh = i_devi_get_vhci(vdip);
9531 9529          mdi_phci_t      *ph, *next;
9532 9530  
9533 9531          MDI_VHCI_PHCI_LOCK(vh);
9534 9532          ph = vh->vh_phci_head;
9535 9533          while (ph) {
9536 9534                  MDI_PHCI_LOCK(ph);
9537 9535  
9538 9536                  if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE)
9539 9537                          next = ph->ph_next;
9540 9538                  else
9541 9539                          next = NULL;
9542 9540  
9543 9541                  MDI_PHCI_UNLOCK(ph);
9544 9542                  ph = next;
9545 9543          }
9546 9544          MDI_VHCI_PHCI_UNLOCK(vh);
9547 9545  }
9548 9546  
9549 9547  
9550 9548  /*
9551 9549   * mdi_walk_vhcis():
9552 9550   *              Walker routine to traverse vhci dev_info nodes
9553 9551   */
9554 9552  void
9555 9553  mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg)
9556 9554  {
9557 9555          mdi_vhci_t      *vh = NULL;
9558 9556  
9559 9557          mutex_enter(&mdi_mutex);
9560 9558          /*
9561 9559           * Scan for already registered vhci
9562 9560           */
9563 9561          for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9564 9562                  vh->vh_refcnt++;
9565 9563                  mutex_exit(&mdi_mutex);
9566 9564                  if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) {
9567 9565                          mutex_enter(&mdi_mutex);
9568 9566                          vh->vh_refcnt--;
9569 9567                          break;
9570 9568                  } else {
9571 9569                          mutex_enter(&mdi_mutex);
9572 9570                          vh->vh_refcnt--;
9573 9571                  }
9574 9572          }
9575 9573  
9576 9574          mutex_exit(&mdi_mutex);
9577 9575  }
9578 9576  
9579 9577  /*
9580 9578   * i_mdi_log_sysevent():
9581 9579   *              Logs events for pickup by syseventd
9582 9580   */
9583 9581  static void
9584 9582  i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass)
9585 9583  {
9586 9584          char            *path_name;
9587 9585          nvlist_t        *attr_list;
9588 9586  
9589 9587          if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE,
9590 9588              KM_SLEEP) != DDI_SUCCESS) {
9591 9589                  goto alloc_failed;
9592 9590          }
9593 9591  
9594 9592          path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
9595 9593          (void) ddi_pathname(dip, path_name);
9596 9594  
9597 9595          if (nvlist_add_string(attr_list, DDI_DRIVER_NAME,
9598 9596              ddi_driver_name(dip)) != DDI_SUCCESS) {
9599 9597                  goto error;
9600 9598          }
9601 9599  
9602 9600          if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR,
9603 9601              (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) {
9604 9602                  goto error;
9605 9603          }
9606 9604  
9607 9605          if (nvlist_add_int32(attr_list, DDI_INSTANCE,
9608 9606              (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) {
9609 9607                  goto error;
9610 9608          }
9611 9609  
9612 9610          if (nvlist_add_string(attr_list, DDI_PATHNAME,
9613 9611              path_name) != DDI_SUCCESS) {
9614 9612                  goto error;
9615 9613          }
9616 9614  
9617 9615          if (nvlist_add_string(attr_list, DDI_CLASS,
9618 9616              ph_vh_class) != DDI_SUCCESS) {
9619 9617                  goto error;
9620 9618          }
9621 9619  
9622 9620          (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass,
9623 9621              attr_list, NULL, DDI_SLEEP);
9624 9622  
9625 9623  error:
9626 9624          kmem_free(path_name, MAXPATHLEN);
9627 9625          nvlist_free(attr_list);
9628 9626          return;
9629 9627  
9630 9628  alloc_failed:
9631 9629          MDI_DEBUG(1, (MDI_WARN, dip, "!unable to send sysevent"));
9632 9630  }
9633 9631  
9634 9632  char **
9635 9633  mdi_get_phci_driver_list(char *vhci_class, int  *ndrivers)
9636 9634  {
9637 9635          char    **driver_list, **ret_driver_list = NULL;
9638 9636          int     *root_support_list;
9639 9637          int     cur_elements, max_elements;
9640 9638  
9641 9639          get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
9642 9640              &cur_elements, &max_elements);
9643 9641  
9644 9642  
9645 9643          if (driver_list) {
9646 9644                  kmem_free(root_support_list, sizeof (int) * max_elements);
9647 9645                  ret_driver_list = mdi_realloc(driver_list, sizeof (char *)
9648 9646                      * max_elements, sizeof (char *) * cur_elements);
9649 9647          }
9650 9648          *ndrivers = cur_elements;
9651 9649  
9652 9650          return (ret_driver_list);
9653 9651  
9654 9652  }
9655 9653  
9656 9654  void
9657 9655  mdi_free_phci_driver_list(char **driver_list, int ndrivers)
9658 9656  {
9659 9657          char    **p;
9660 9658          int     i;
9661 9659  
9662 9660          if (driver_list) {
9663 9661                  for (i = 0, p = driver_list; i < ndrivers; i++, p++)
9664 9662                          kmem_free(*p, strlen(*p) + 1);
9665 9663                  kmem_free(driver_list, sizeof (char *) * ndrivers);
9666 9664          }
9667 9665  }
9668 9666  
9669 9667  /*
9670 9668   * mdi_is_dev_supported():
9671 9669   *              function called by pHCI bus config operation to determine if a
9672 9670   *              device should be represented as a child of the vHCI or the
9673 9671   *              pHCI.  This decision is made by the vHCI, using cinfo idenity
9674 9672   *              information passed by the pHCI - specifics of the cinfo
9675 9673   *              representation are by agreement between the pHCI and vHCI.
9676 9674   * Return Values:
9677 9675   *              MDI_SUCCESS
9678 9676   *              MDI_FAILURE
9679 9677   */
9680 9678  int
9681 9679  mdi_is_dev_supported(char *class, dev_info_t *pdip, void *cinfo)
9682 9680  {
9683 9681          mdi_vhci_t      *vh;
9684 9682  
9685 9683          ASSERT(class && pdip);
9686 9684  
9687 9685          /*
9688 9686           * For dev_supported, mdi_phci_register() must have established pdip as
9689 9687           * a pHCI.
9690 9688           *
9691 9689           * NOTE: mdi_phci_register() does "mpxio-disable" processing, and
9692 9690           * MDI_PHCI(pdip) will return false if mpxio is disabled.
9693 9691           */
9694 9692          if (!MDI_PHCI(pdip))
9695 9693                  return (MDI_FAILURE);
9696 9694  
9697 9695          /* Return MDI_FAILURE if vHCI does not support asking the question. */
9698 9696          vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
9699 9697          if ((vh == NULL) || (vh->vh_ops->vo_is_dev_supported == NULL)) {
9700 9698                  return (MDI_FAILURE);
9701 9699          }
9702 9700  
9703 9701          /* Return vHCI answer */
9704 9702          return (vh->vh_ops->vo_is_dev_supported(vh->vh_dip, pdip, cinfo));
9705 9703  }
9706 9704  
9707 9705  int
9708 9706  mdi_dc_return_dev_state(mdi_pathinfo_t *pip, struct devctl_iocdata *dcp)
9709 9707  {
9710 9708          uint_t devstate = 0;
9711 9709          dev_info_t *cdip;
9712 9710  
9713 9711          if ((pip == NULL) || (dcp == NULL))
9714 9712                  return (MDI_FAILURE);
9715 9713  
9716 9714          cdip = mdi_pi_get_client(pip);
9717 9715  
9718 9716          switch (mdi_pi_get_state(pip)) {
9719 9717          case MDI_PATHINFO_STATE_INIT:
9720 9718                  devstate = DEVICE_DOWN;
9721 9719                  break;
9722 9720          case MDI_PATHINFO_STATE_ONLINE:
9723 9721                  devstate = DEVICE_ONLINE;
9724 9722                  if ((cdip) && (devi_stillreferenced(cdip) == DEVI_REFERENCED))
9725 9723                          devstate |= DEVICE_BUSY;
9726 9724                  break;
9727 9725          case MDI_PATHINFO_STATE_STANDBY:
9728 9726                  devstate = DEVICE_ONLINE;
9729 9727                  break;
9730 9728          case MDI_PATHINFO_STATE_FAULT:
9731 9729                  devstate = DEVICE_DOWN;
9732 9730                  break;
9733 9731          case MDI_PATHINFO_STATE_OFFLINE:
9734 9732                  devstate = DEVICE_OFFLINE;
9735 9733                  break;
9736 9734          default:
9737 9735                  ASSERT(MDI_PI(pip)->pi_state);
9738 9736          }
9739 9737  
9740 9738          if (copyout(&devstate, dcp->cpyout_buf, sizeof (uint_t)) != 0)
9741 9739                  return (MDI_FAILURE);
9742 9740  
9743 9741          return (MDI_SUCCESS);
9744 9742  }
  
    | 
      ↓ open down ↓ | 
    3613 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX