1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 /*
  26  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
  27  */
  28 
  29 #ifndef _SYS_MDI_IMPLDEFS_H
  30 #define _SYS_MDI_IMPLDEFS_H
  31 
  32 
  33 #include <sys/note.h>
  34 #include <sys/types.h>
  35 #include <sys/sunmdi.h>
  36 #include <sys/modhash.h>
  37 #include <sys/callb.h>
  38 #include <sys/devctl.h>
  39 
  40 #ifdef  __cplusplus
  41 extern "C" {
  42 #endif
  43 
  44 #ifdef _KERNEL
  45 
  46 /*
  47  * Multipath Driver Interfaces
  48  *
  49  * The multipathing framework is provided in two modules.  The 'mpxio' misc.
  50  * module provides the core multipath framework and the 'scsi_vhci' nexus
  51  * driver provides the SCSI-III command set driver functionality for
  52  * managing Fibre-Channel storage devices.
  53  *
  54  * As in any multipathing solution there are three major problems to solve:
  55  *
  56  * 1) Identification and enumeration of multipath client devices.
  57  * 2) Optimal path selection when routing I/O requests.
  58  * 3) Observability interfaces to snapshot the multipath configuration,
  59  *    and infrastructure to provide performance and error statistics.
  60  *
  61  * The mpxio framework consists of several major components:
  62  *
  63  * 1) The MDI is the Multiplexed Device Interface; this is the core glue which
  64  *    holds the following components together.
  65  * 2) vHCI (Virtual Host Controller Interconnect) drivers provide multipathing
  66  *    services for a given bus technology (example: 'scsi_vhci' provides
  67  *    multipathing support for SCSI-III fibre-channel devices).
  68  * 3) pHCI (Physical Host Controller Interconnect) drivers provide transport
  69  *    services for a given host controller (example: 'fcp' provides transport
  70  *    for fibre-channel devices).
  71  * 4) Client Devices are standard Solaris target (or leaf) drivers
  72  *    (example: 'ssd' is the standard disk driver for fibre-channel arrays).
  73  * 5) Multipath information nodes ('pathinfo' nodes) connect client device
  74  *    nodes and pHCI device nodes in the device tree.
  75  *
  76  * With the scsi_vhci, a QLC card, and mpxio enabled, the device tree might
  77  * look like this:
  78  *
  79  *              /\
  80  *             /  ............
  81  *     <vHCI>:/               \
  82  *      +-----------+   +-----------+
  83  *      | scsi_vhci |   |  pci@1f,0 |
  84  *      +-----------+   +-----------+
  85  *            /   \               \
  86  * <Client>: /     \ :<Client>     \ :parent(pHCI)
  87  *  +----------+ +-----------+    +-------------+
  88  *  | ssd 1    | | ssd 2     |    | qlc@0,0     |
  89  *  +----------+ +-----------+    +-------------+
  90  *   |            |                /        \
  91  *   |            |       <pHCI>: /          \ :<pHCI>
  92  *   |            |      +-------------+   +-------------+
  93  *   |            |      | pHCI 1 (fp) |   | pHCI 2 (fp) |
  94  *   |            |      +-------------+   +-------------+
  95  *   |            |          /        |      /          |
  96  *   |            |    +------+       |    +------+     |
  97  *   |            |    | ssd 3|       |    | ssd  |     |
  98  *   |            |    |!mpxio|       |    | (OBP)|     |
  99  *   |            |    +------+       |    +------+     |
 100  *   |            |                   |                 |
 101  *   |            |       <pathinfo>: |                 |
 102  *   |            |               +-------+         +--------+
 103  *   |            +-------------->| path  |-------->| path   |
 104  *   |                            | info  |         | info   |
 105  *   |                            | node 1|         | node 3 |
 106  *   |                            +-------+         +--------+
 107  *   |                                |               |
 108  *   |                                |            +~~~~~~~~+
 109  *   |                            +-------+        :+--------+
 110  *   +--------------------------->| path  |-------->| path   |
 111  *                                | info  |        :| info   |
 112  *                                | node 2|        +| node 4 |
 113  *                                +-------+         +--------+
 114  *
 115  * The multipath information nodes (mdi_pathinfo nodes) establish the
 116  * relationship between the pseudo client driver instance nodes (children
 117  * of the vHCI) and the physical host controller interconnect (pHCI
 118  * drivers) forming a matrix structure.
 119  *
 120  * The mpxio module implements locking at multiple granularity levels to
 121  * support the needs of various consumers.  The multipath matrix can be
 122  * column locked, or row locked depending on the consumer. The intention
 123  * is to balance simplicity and performance.
 124  *
 125  * Locking:
 126  *
 127  * The devinfo locking still applies:
 128  *
 129  *   1) An ndi_devi_enter of a parent protects linkage/state of children.
 130  *   2) state >= DS_INITIALIZED adds devi_ref of parent
 131  *   3) devi_ref at state >= DS_ATTACHED prevents detach(9E).
 132  *
 133  * The ordering of 1) is (vHCI, pHCI). For a DEBUG kernel this ordering
 134  * is asserted by the ndi_devi_enter() implementation.  There is also an
 135  * ndi_devi_enter(Client), which is atypical since the client is a leaf.
 136  * This is done to synchronize pathinfo nodes during devinfo snapshot (see
 137  * di_register_pip) by pretending that the pathinfo nodes are children
 138  * of the client.
 139  *
 140  * In addition to devinfo locking the current implementation utilizes
 141  * the following locks:
 142  *
 143  *   mdi_mutex: protects the global list of vHCIs.
 144  *
 145  *   vh_phci_mutex: per-vHCI (mutex) lock: protects list of pHCIs registered
 146  *   with vHCI.
 147  *
 148  *   vh_client_mutex: per-vHCI (mutex) lock: protects list/hash of Clients
 149  *   associated with vHCI.
 150  *
 151  *   ph_mutex: per-pHCI (mutex) lock: protects the column (pHCI-mdi_pathinfo
 152  *   node list) and per-pHCI structure fields.  mdi_pathinfo node creation,
 153  *   deletion and child mdi_pathinfo node state changes are serialized on per
 154  *   pHCI basis (Protection against DR).
 155  *
 156  *   ct_mutex: per-client (mutex) lock: protects the row (client-mdi_pathinfo
 157  *   node list) and per-client structure fields.  The client-mdi_pathinfo node
 158  *   list is typically walked to select an optimal path when routing I/O
 159  *   requests.
 160  *
 161  *   pi_mutex: per-mdi_pathinfo (mutex) lock: protects the mdi_pathinfo node
 162  *   structure fields.
 163  *
 164  * Note that per-Client structure and per-pHCI fields are freely readable when
 165  * corresponding mdi_pathinfo locks are held, since holding an mdi_pathinfo
 166  * node guarantees that its corresponding client and pHCI devices will not be
 167  * freed.
 168  */
 169 
 170 /*
 171  * MDI Client global unique identifier property name string definition
 172  */
 173 extern const char                       *mdi_client_guid_prop;
 174 #define MDI_CLIENT_GUID_PROP            (char *)mdi_client_guid_prop
 175 
 176 /*
 177  * MDI Client load balancing policy definitions
 178  *
 179  * Load balancing policies are determined on a per-vHCI basis and are
 180  * configurable via the vHCI's driver.conf file.
 181  */
 182 typedef enum {
 183         LOAD_BALANCE_NONE,              /* Alternate pathing            */
 184         LOAD_BALANCE_RR,                /* Round Robin                  */
 185         LOAD_BALANCE_LBA                /* Logical Block Addressing     */
 186 } client_lb_t;
 187 
 188 typedef struct {
 189         int region_size;
 190 }client_lb_args_t;
 191 
 192 /*
 193  * MDI client load balancing property name/value string definitions
 194  */
 195 extern const char                       *mdi_load_balance;
 196 extern const char                       *mdi_load_balance_none;
 197 extern const char                       *mdi_load_balance_ap;
 198 extern const char                       *mdi_load_balance_rr;
 199 extern const char                       *mdi_load_balance_lba;
 200 
 201 #define LOAD_BALANCE_PROP               (char *)mdi_load_balance
 202 #define LOAD_BALANCE_PROP_NONE          (char *)mdi_load_balance_none
 203 #define LOAD_BALANCE_PROP_AP            (char *)mdi_load_balance_ap
 204 #define LOAD_BALANCE_PROP_RR            (char *)mdi_load_balance_rr
 205 #define LOAD_BALANCE_PROP_LBA           (char *)mdi_load_balance_lba
 206 
 207 /* default for region size */
 208 #define LOAD_BALANCE_DEFAULT_REGION_SIZE        18
 209 
 210 /*
 211  * vHCI drivers:
 212  *
 213  * vHCI drivers are pseudo nexus drivers which implement multipath services
 214  * for a specific command set or bus architecture ('class').  There is a
 215  * single instance of the vHCI driver for each command set which supports
 216  * multipath devices.
 217  *
 218  * Each vHCI driver registers the following callbacks from attach(9e).
 219  */
 220 #define MDI_VHCI_OPS_REV_1              1
 221 #define MDI_VHCI_OPS_REV                MDI_VHCI_OPS_REV_1
 222 
 223 typedef struct mdi_vhci_ops {
 224         /* revision management */
 225         int     vo_revision;
 226 
 227         /* mdi_pathinfo node init callback */
 228         int     (*vo_pi_init)(dev_info_t *vdip, mdi_pathinfo_t *pip, int flags);
 229 
 230         /* mdi_pathinfo node uninit callback */
 231         int     (*vo_pi_uninit)(dev_info_t *vdip, mdi_pathinfo_t *pip,
 232                     int flags);
 233 
 234         /* mdi_pathinfo node state change callback */
 235         int     (*vo_pi_state_change)(dev_info_t *vdip, mdi_pathinfo_t *pip,
 236                     mdi_pathinfo_state_t state, uint32_t, int flags);
 237 
 238         /* Client path failover callback */
 239         int     (*vo_failover)(dev_info_t *vdip, dev_info_t *cdip, int flags);
 240 
 241         /* Client attached callback */
 242         void    (*vo_client_attached)(dev_info_t *cdip);
 243 
 244         /* Ask vHCI if 'cinfo' device is support as a client */
 245         int     (*vo_is_dev_supported)(dev_info_t *vdip, dev_info_t *pdip,
 246                     void *cinfo);
 247 } mdi_vhci_ops_t;
 248 
 249 /*
 250  * An mdi_vhci structure is created and bound to the devinfo node of every
 251  * registered vHCI class driver; this happens when a vHCI registers itself from
 252  * attach(9e).  This structure is unbound and freed when the vHCI unregisters
 253  * at detach(9e) time;
 254  *
 255  * Each vHCI driver is associated with a vHCI class name; this is the handle
 256  * used to register and unregister pHCI drivers for a given transport.
 257  *
 258  * Locking: Different parts of this structure are guarded by different
 259  * locks: global threading of multiple vHCIs and initialization is protected
 260  * by mdi_mutex, the list of pHCIs associated with a vHCI is protected by
 261  * vh_phci_mutex, and Clients are protected by vh_client_mutex.
 262  *
 263  * XXX Depending on the context, some of the fields can be freely read without
 264  * holding any locks (ex. holding vh_client_mutex lock also guarantees that
 265  * the vHCI (parent) cannot be unexpectedly freed).
 266  */
 267 typedef struct mdi_vhci {
 268         /* protected by mdi_mutex... */
 269         struct mdi_vhci         *vh_next;       /* next vHCI link       */
 270         struct mdi_vhci         *vh_prev;       /* prev vHCI link       */
 271         char                    *vh_class;      /* vHCI class name      */
 272         dev_info_t              *vh_dip;        /* vHCI devi handle     */
 273         int                     vh_refcnt;      /* vHCI reference count */
 274         struct mdi_vhci_config  *vh_config;     /* vHCI config          */
 275         client_lb_t             vh_lb;          /* vHCI load-balancing  */
 276         struct mdi_vhci_ops     *vh_ops;        /* vHCI callback vectors */
 277 
 278         /* protected by MDI_VHCI_PHCI_LOCK vh_phci_mutex... */
 279         kmutex_t                vh_phci_mutex;  /* pHCI mutex           */
 280         int                     vh_phci_count;  /* pHCI device count    */
 281         struct mdi_phci         *vh_phci_head;  /* pHCI list head       */
 282         struct mdi_phci         *vh_phci_tail;  /* pHCI list tail       */
 283 
 284         /* protected by MDI_VHCI_CLIENT_LOCK vh_client_mutex... */
 285         kmutex_t                vh_client_mutex; /* Client mutex        */
 286         int                     vh_client_count; /* Client count        */
 287         struct client_hash      *vh_client_table; /* Client hash        */
 288 } mdi_vhci_t;
 289 
 290 /*
 291  * per-vHCI lock macros
 292  */
 293 #define MDI_VHCI_PHCI_LOCK(vh)          mutex_enter(&(vh)->vh_phci_mutex)
 294 #define MDI_VHCI_PHCI_TRYLOCK(vh)       mutex_tryenter(&(vh)->vh_phci_mutex)
 295 #define MDI_VHCI_PHCI_UNLOCK(vh)        mutex_exit(&(vh)->vh_phci_mutex)
 296 #ifdef  DEBUG
 297 #define MDI_VHCI_PCHI_LOCKED(vh)        MUTEX_HELD(&(vh)->vh_phci_mutex)
 298 #endif  /* DEBUG */
 299 #define MDI_VHCI_CLIENT_LOCK(vh)        mutex_enter(&(vh)->vh_client_mutex)
 300 #define MDI_VHCI_CLIENT_TRYLOCK(vh)     mutex_tryenter(&(vh)->vh_client_mutex)
 301 #define MDI_VHCI_CLIENT_UNLOCK(vh)      mutex_exit(&(vh)->vh_client_mutex)
 302 #ifdef  DEBUG
 303 #define MDI_VHCI_CLIENT_LOCKED(vh)      MUTEX_HELD(&(vh)->vh_client_mutex)
 304 #endif  /* DEBUG */
 305 
 306 
 307 /*
 308  * GUID Hash definitions
 309  *
 310  * Since all the mpxio managed devices for a given class are enumerated under
 311  * the single vHCI instance for that class, sequentially walking through the
 312  * client device link to find a client would be prohibitively slow.
 313  */
 314 
 315 #define CLIENT_HASH_TABLE_SIZE  (32)    /* GUID hash */
 316 
 317 /*
 318  * Client hash table structure
 319  */
 320 struct client_hash {
 321         struct mdi_client       *ct_hash_head;  /* Client hash head     */
 322         int                     ct_hash_count;  /* Client hash count    */
 323 };
 324 
 325 
 326 /*
 327  * pHCI Drivers:
 328  *
 329  * Physical HBA drivers provide transport services for mpxio-managed devices.
 330  * As each pHCI instance is attached, it must register itself with the mpxio
 331  * framework using mdi_phci_register().  When the pHCI is detached it must
 332  * similarly call mdi_phci_unregister().
 333  *
 334  * The framework maintains a list of registered pHCI device instances for each
 335  * vHCI.  This list involves (vh_phci_count, vh_phci_head, vh_phci_tail) and
 336  * (ph_next, ph_prev, ph_vhci) and is protected by vh_phci_mutex.
 337  *
 338  * Locking order:
 339  *
 340  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_phci::ph_mutex))             XXX
 341  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex devinfo_tree_lock))              XXX
 342  */
 343 typedef struct mdi_phci {
 344         /* protected by MDI_VHCI_PHCI_LOCK vh_phci_mutex... */
 345         struct mdi_phci         *ph_next;       /* next pHCI link       */
 346         struct mdi_phci         *ph_prev;       /* prev pHCI link       */
 347         dev_info_t              *ph_dip;        /* pHCI devi handle     */
 348         struct mdi_vhci         *ph_vhci;       /* pHCI back ref. to vHCI */
 349 
 350         /* protected by MDI_PHCI_LOCK ph_mutex... */
 351         kmutex_t                ph_mutex;       /* per-pHCI mutex       */
 352         int                     ph_path_count;  /* pi count             */
 353         mdi_pathinfo_t          *ph_path_head;  /* pi list head         */
 354         mdi_pathinfo_t          *ph_path_tail;  /* pi list tail         */
 355         int                     ph_flags;       /* pHCI operation flags */
 356         int                     ph_unstable;    /* Paths in transient state */
 357         kcondvar_t              ph_unstable_cv; /* Paths in transient state */
 358 
 359         /* protected by mdi_phci_[gs]et_vhci_private caller... */
 360         void                    *ph_vprivate;   /* vHCI driver private  */
 361 } mdi_phci_t;
 362 
 363 /*
 364  * A pHCI device is 'unstable' while one or more paths are in a transitional
 365  * state.  Hotplugging is prevented during this state.
 366  */
 367 #define MDI_PHCI_UNSTABLE(ph)           (ph)->ph_unstable++;
 368 #define MDI_PHCI_STABLE(ph) { \
 369         (ph)->ph_unstable--; \
 370         if ((ph)->ph_unstable == 0) { \
 371                 cv_broadcast(&(ph)->ph_unstable_cv); \
 372         } \
 373 }
 374 
 375 /*
 376  * per-pHCI lock macros
 377  */
 378 #define MDI_PHCI_LOCK(ph)               mutex_enter(&(ph)->ph_mutex)
 379 #define MDI_PHCI_TRYLOCK(ph)            mutex_tryenter(&(ph)->ph_mutex)
 380 #define MDI_PHCI_UNLOCK(ph)             mutex_exit(&(ph)->ph_mutex)
 381 #ifdef  DEBUG
 382 #define MDI_PHCI_LOCKED(vh)             MUTEX_HELD(&(ph)->ph_mutex)
 383 #endif  /* DEBUG */
 384 
 385 /*
 386  * pHCI state definitions and macros to track the pHCI driver instance state
 387  */
 388 #define MDI_PHCI_FLAGS_OFFLINE          0x1     /* pHCI is offline */
 389 #define MDI_PHCI_FLAGS_SUSPEND          0x2     /* pHCI is suspended */
 390 #define MDI_PHCI_FLAGS_POWER_DOWN       0x4     /* pHCI is power down */
 391 #define MDI_PHCI_FLAGS_DETACH           0x8     /* pHCI is detached */
 392 #define MDI_PHCI_FLAGS_USER_DISABLE     0x10    /* pHCI is disabled,user */
 393 #define MDI_PHCI_FLAGS_D_DISABLE        0x20    /* pHCI is disabled,driver */
 394 #define MDI_PHCI_FLAGS_D_DISABLE_TRANS  0x40    /* pHCI is disabled,transient */
 395 #define MDI_PHCI_FLAGS_POWER_TRANSITION 0x80    /* pHCI is power transition */
 396 
 397 #define MDI_PHCI_DISABLE_MASK                                           \
 398             (MDI_PHCI_FLAGS_USER_DISABLE | MDI_PHCI_FLAGS_D_DISABLE |   \
 399             MDI_PHCI_FLAGS_D_DISABLE_TRANS)
 400 
 401 #define MDI_PHCI_IS_READY(ph)                                           \
 402             (((ph)->ph_flags & MDI_PHCI_DISABLE_MASK) == 0)
 403 
 404 #define MDI_PHCI_SET_OFFLINE(ph)                                        {\
 405             ASSERT(MDI_PHCI_LOCKED(ph));                                \
 406             (ph)->ph_flags |= MDI_PHCI_FLAGS_OFFLINE;                        }
 407 #define MDI_PHCI_SET_ONLINE(ph)                                         {\
 408             ASSERT(MDI_PHCI_LOCKED(ph));                                \
 409             (ph)->ph_flags &= ~MDI_PHCI_FLAGS_OFFLINE;                   }
 410 #define MDI_PHCI_IS_OFFLINE(ph)                                         \
 411             ((ph)->ph_flags & MDI_PHCI_FLAGS_OFFLINE)
 412 
 413 #define MDI_PHCI_SET_SUSPEND(ph)                                        {\
 414             ASSERT(MDI_PHCI_LOCKED(ph));                                \
 415             (ph)->ph_flags |= MDI_PHCI_FLAGS_SUSPEND;                        }
 416 #define MDI_PHCI_SET_RESUME(ph)                                         {\
 417             ASSERT(MDI_PHCI_LOCKED(ph));                                \
 418             (ph)->ph_flags &= ~MDI_PHCI_FLAGS_SUSPEND;                   }
 419 #define MDI_PHCI_IS_SUSPENDED(ph)                                       \
 420             ((ph)->ph_flags & MDI_PHCI_FLAGS_SUSPEND)
 421 
 422 #define MDI_PHCI_SET_DETACH(ph)                                         {\
 423             ASSERT(MDI_PHCI_LOCKED(ph));                                \
 424             (ph)->ph_flags |= MDI_PHCI_FLAGS_DETACH;                 }
 425 #define MDI_PHCI_SET_ATTACH(ph)                                         {\
 426             ASSERT(MDI_PHCI_LOCKED(ph));                                \
 427             (ph)->ph_flags &= ~MDI_PHCI_FLAGS_DETACH;                    }
 428 
 429 #define MDI_PHCI_SET_POWER_DOWN(ph)                                     {\
 430             ASSERT(MDI_PHCI_LOCKED(ph));                                \
 431             (ph)->ph_flags |= MDI_PHCI_FLAGS_POWER_DOWN;             }
 432 #define MDI_PHCI_SET_POWER_UP(ph)                                       {\
 433             ASSERT(MDI_PHCI_LOCKED(ph));                                \
 434             (ph)->ph_flags &= ~MDI_PHCI_FLAGS_POWER_DOWN;                }
 435 #define MDI_PHCI_IS_POWERED_DOWN(ph)                                    \
 436             ((ph)->ph_flags & MDI_PHCI_FLAGS_POWER_DOWN)
 437 
 438 #define MDI_PHCI_SET_USER_ENABLE(ph)                                    {\
 439             ASSERT(MDI_PHCI_LOCKED(ph));                                \
 440             (ph)->ph_flags &= ~MDI_PHCI_FLAGS_USER_DISABLE;              }
 441 #define MDI_PHCI_SET_USER_DISABLE(ph)                                   {\
 442             ASSERT(MDI_PHCI_LOCKED(ph));                                \
 443             (ph)->ph_flags |= MDI_PHCI_FLAGS_USER_DISABLE;           }
 444 #define MDI_PHCI_IS_USER_DISABLED(ph)                                   \
 445             ((ph)->ph_flags & MDI_PHCI_FLAGS_USER_DISABLE)
 446 
 447 #define MDI_PHCI_SET_DRV_ENABLE(ph)                                     {\
 448             ASSERT(MDI_PHCI_LOCKED(ph));                                \
 449             (ph)->ph_flags &= ~MDI_PHCI_FLAGS_D_DISABLE;         }
 450 #define MDI_PHCI_SET_DRV_DISABLE(ph)                                    {\
 451             ASSERT(MDI_PHCI_LOCKED(ph));                                \
 452             (ph)->ph_flags |= MDI_PHCI_FLAGS_D_DISABLE;                      }
 453 #define MDI_PHCI_IS_DRV_DISABLED(ph)                                    \
 454             ((ph)->ph_flags & MDI_PHCI_FLAGS_D_DISABLE)
 455 
 456 #define MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph)                           {\
 457             ASSERT(MDI_PHCI_LOCKED(ph));                                \
 458             (ph)->ph_flags &= ~MDI_PHCI_FLAGS_D_DISABLE_TRANS;           }
 459 #define MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph)                          {\
 460             ASSERT(MDI_PHCI_LOCKED(ph));                                \
 461             (ph)->ph_flags |= MDI_PHCI_FLAGS_D_DISABLE_TRANS;                }
 462 #define MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)                          \
 463             ((ph)->ph_flags & MDI_PHCI_FLAGS_D_DISABLE_TRANS)
 464 
 465 #define MDI_PHCI_SET_POWER_TRANSITION(ph)                               {\
 466             ASSERT(MDI_PHCI_LOCKED(ph));                                \
 467             (ph)->ph_flags |= MDI_PHCI_FLAGS_POWER_TRANSITION;               }
 468 #define MDI_PHCI_CLEAR_POWER_TRANSITION(ph)                             {\
 469             ASSERT(MDI_PHCI_LOCKED(ph));                                \
 470             (ph)->ph_flags &= ~MDI_PHCI_FLAGS_POWER_TRANSITION;          }
 471 #define MDI_PHCI_IS_POWER_TRANSITION(ph)                                \
 472             ((ph)->ph_flags & MDI_PHCI_FLAGS_POWER_TRANSITION)
 473 
 474 /*
 475  * mpxio Managed Clients:
 476  *
 477  * This framework creates a struct mdi_client for every client device created
 478  * by the framework as a result of self-enumeration of target devices by the
 479  * registered pHCI devices.  This structure is bound to client device dev_info
 480  * node at the time of client device allocation (ndi_devi_alloc(9e)). This
 481  * structure is unbound from the dev_info node when mpxio framework removes a
 482  * client device node from the system.
 483  *
 484  * This structure is created when a first path is enumerated and removed when
 485  * last path is de-enumerated from the system.
 486  *
 487  * Multipath client devices are instantiated as children of corresponding vHCI
 488  * driver instance. Each client device is uniquely identified by a GUID
 489  * provided by target device itself.  The parent vHCI device also maintains a
 490  * hashed list of client devices, protected by vh_client_mutex.
 491  *
 492  * Typically pHCI devices self-enumerate their child devices using taskq,
 493  * resulting in multiple paths to the same client device to be enumerated by
 494  * competing threads.
 495  *
 496  * Currently this framework supports two kinds of load-balancing policy
 497  * configurable through the vHCI driver configuration files.
 498  *
 499  * NONE         - Legacy AP mode
 500  * Round Robin  - Balance the pHCI load in a Round Robin fashion.
 501  *
 502  * This framework identifies the client device in three distinct states:
 503  *
 504  * OPTIMAL      - Client device has at least one redundant path.
 505  * DEGRADED     - No redundant paths (critical).  Failure in the current active
 506  *                path would result in data access failures.
 507  * FAILED       - No paths are available to access this device.
 508  *
 509  * Locking order:
 510  *
 511  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_client::ct_mutex))                   XXX
 512  * _NOTE(LOCK_ORDER(mdi_client::ct_mutex devinfo_tree_lock))            XXX
 513  */
 514 typedef struct mdi_client {
 515         /* protected by MDI_VHCI_CLIENT_LOCK vh_client_mutex... */
 516         struct mdi_client       *ct_hnext;      /* next client          */
 517         struct mdi_client       *ct_hprev;      /* prev client          */
 518         dev_info_t              *ct_dip;        /* client devi handle   */
 519         struct mdi_vhci         *ct_vhci;       /* vHCI back ref        */
 520         char                    *ct_drvname;    /* client driver name   */
 521         char                    *ct_guid;       /* client guid          */
 522         client_lb_t             ct_lb;          /* load balancing scheme */
 523         client_lb_args_t        *ct_lb_args;    /* load balancing args */
 524 
 525 
 526         /* protected by MDI_CLIENT_LOCK ct_mutex... */
 527         kmutex_t                ct_mutex;       /* per-client mutex     */
 528         int                     ct_path_count;  /* multi path count     */
 529         mdi_pathinfo_t          *ct_path_head;  /* multi path list head */
 530         mdi_pathinfo_t          *ct_path_tail;  /* multi path list tail */
 531         mdi_pathinfo_t          *ct_path_last;  /* last path used for i/o */
 532         int                     ct_state;       /* state information    */
 533         int                     ct_flags;       /* Driver op. flags     */
 534         int                     ct_failover_flags;      /* Failover args */
 535         int                     ct_failover_status;     /* last fo status */
 536         kcondvar_t              ct_failover_cv; /* Failover status cv   */
 537         int                     ct_unstable;    /* Paths in transient state */
 538         kcondvar_t              ct_unstable_cv; /* Paths in transient state */
 539 
 540         int                     ct_power_cnt;   /* Hold count on parent power */
 541         kcondvar_t              ct_powerchange_cv;
 542                                         /* Paths in power transient state */
 543         short                   ct_powercnt_config;
 544                                         /* held in pre/post config */
 545         short                   ct_powercnt_unconfig;
 546                                         /* held in pre/post unconfig */
 547         int                     ct_powercnt_reset;
 548                                         /* ct_power_cnt was reset */
 549 
 550         void                    *ct_cprivate;   /* client driver private */
 551         void                    *ct_vprivate;   /* vHCI driver private  */
 552 } mdi_client_t;
 553 
 554 /*
 555  * per-Client device locking definitions
 556  */
 557 #define MDI_CLIENT_LOCK(ct)             mutex_enter(&(ct)->ct_mutex)
 558 #define MDI_CLIENT_TRYLOCK(ct)          mutex_tryenter(&(ct)->ct_mutex)
 559 #define MDI_CLIENT_UNLOCK(ct)           mutex_exit(&(ct)->ct_mutex)
 560 #ifdef  DEBUG
 561 #define MDI_CLIENT_LOCKED(ct)           MUTEX_HELD(&(ct)->ct_mutex)
 562 #endif  /* DEBUG */
 563 
 564 /*
 565  * A Client device is in unstable while one or more paths are in transitional
 566  * state.  We do not allow failover to take place while paths are in transient
 567  * state. Similarly we do not allow state transition while client device
 568  * failover is in progress.
 569  */
 570 #define MDI_CLIENT_UNSTABLE(ct)         (ct)->ct_unstable++;
 571 #define MDI_CLIENT_STABLE(ct) { \
 572         (ct)->ct_unstable--; \
 573         if ((ct)->ct_unstable == 0) { \
 574                 cv_broadcast(&(ct)->ct_unstable_cv); \
 575         } \
 576 }
 577 
 578 /*
 579  * Client driver instance state definitions:
 580  */
 581 #define MDI_CLIENT_FLAGS_OFFLINE                0x00000001
 582 #define MDI_CLIENT_FLAGS_SUSPEND                0x00000002
 583 #define MDI_CLIENT_FLAGS_POWER_DOWN             0x00000004
 584 #define MDI_CLIENT_FLAGS_DETACH                 0x00000008
 585 #define MDI_CLIENT_FLAGS_FAILOVER               0x00000010
 586 #define MDI_CLIENT_FLAGS_REPORT_DEV             0x00000020
 587 #define MDI_CLIENT_FLAGS_PATH_FREE_IN_PROGRESS  0x00000040
 588 #define MDI_CLIENT_FLAGS_ASYNC_FREE             0x00000080
 589 #define MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED      0x00000100
 590 #define MDI_CLIENT_FLAGS_POWER_TRANSITION       0x00000200
 591 #define MDI_CLIENT_FLAGS_NO_EVENT               0x10000000
 592 
 593 #define MDI_CLIENT_SET_OFFLINE(ct)                                      {\
 594             ASSERT(MDI_CLIENT_LOCKED(ct));                              \
 595             (ct)->ct_flags |= MDI_CLIENT_FLAGS_OFFLINE;                      }
 596 #define MDI_CLIENT_SET_ONLINE(ct)                                       {\
 597             ASSERT(MDI_CLIENT_LOCKED(ct));                              \
 598             (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_OFFLINE;         }
 599 #define MDI_CLIENT_IS_OFFLINE(ct) \
 600             ((ct)->ct_flags & MDI_CLIENT_FLAGS_OFFLINE)
 601 
 602 #define MDI_CLIENT_SET_SUSPEND(ct)                                      {\
 603             ASSERT(MDI_CLIENT_LOCKED(ct));                              \
 604             (ct)->ct_flags |= MDI_CLIENT_FLAGS_SUSPEND;                      }
 605 #define MDI_CLIENT_SET_RESUME(ct)                                       {\
 606             ASSERT(MDI_CLIENT_LOCKED(ct));                              \
 607             (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_SUSPEND;         }
 608 #define MDI_CLIENT_IS_SUSPENDED(ct) \
 609             ((ct)->ct_flags & MDI_CLIENT_FLAGS_SUSPEND)
 610 
 611 #define MDI_CLIENT_SET_POWER_DOWN(ct)                                   {\
 612             ASSERT(MDI_CLIENT_LOCKED(ct));                              \
 613             (ct)->ct_flags |= MDI_CLIENT_FLAGS_POWER_DOWN;           }
 614 #define MDI_CLIENT_SET_POWER_UP(ct)                                     {\
 615             ASSERT(MDI_CLIENT_LOCKED(ct));                              \
 616             (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_POWER_DOWN;              }
 617 #define MDI_CLIENT_IS_POWERED_DOWN(ct) \
 618             ((ct)->ct_flags & MDI_CLIENT_FLAGS_POWER_DOWN)
 619 
 620 #define MDI_CLIENT_SET_POWER_TRANSITION(ct)                             {\
 621             ASSERT(MDI_CLIENT_LOCKED(ct));                              \
 622             (ct)->ct_flags |= MDI_CLIENT_FLAGS_POWER_TRANSITION;     }
 623 #define MDI_CLIENT_CLEAR_POWER_TRANSITION(ct)                           {\
 624             ASSERT(MDI_CLIENT_LOCKED(ct));                              \
 625             (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_POWER_TRANSITION;        }
 626 #define MDI_CLIENT_IS_POWER_TRANSITION(ct) \
 627             ((ct)->ct_flags & MDI_CLIENT_FLAGS_POWER_TRANSITION)
 628 
 629 #define MDI_CLIENT_SET_DETACH(ct)                                       {\
 630             ASSERT(MDI_CLIENT_LOCKED(ct));                              \
 631             (ct)->ct_flags |= MDI_CLIENT_FLAGS_DETACH;                       }
 632 #define MDI_CLIENT_SET_ATTACH(ct)                                       {\
 633             ASSERT(MDI_CLIENT_LOCKED(ct));                              \
 634             (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_DETACH;                  }
 635 #define MDI_CLIENT_IS_DETACHED(ct) \
 636             ((ct)->ct_flags & MDI_CLIENT_FLAGS_DETACH)
 637 
 638 #define MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct)                         {\
 639             ASSERT(MDI_CLIENT_LOCKED(ct));                              \
 640             (ct)->ct_flags |= MDI_CLIENT_FLAGS_FAILOVER;             }
 641 #define MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct)                       {\
 642             ASSERT(MDI_CLIENT_LOCKED(ct));                              \
 643             (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_FAILOVER;                }
 644 #define MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct) \
 645             ((ct)->ct_flags & MDI_CLIENT_FLAGS_FAILOVER)
 646 
 647 #define MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct)                            {\
 648             ASSERT(MDI_CLIENT_LOCKED(ct));                              \
 649             (ct)->ct_flags |= MDI_CLIENT_FLAGS_REPORT_DEV;           }
 650 #define MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct)                          {\
 651             ASSERT(MDI_CLIENT_LOCKED(ct));                              \
 652             (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_REPORT_DEV;              }
 653 #define MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) \
 654             ((ct)->ct_flags & MDI_CLIENT_FLAGS_REPORT_DEV)
 655 
 656 #define MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct)                        {\
 657             ASSERT(MDI_CLIENT_LOCKED(ct));                              \
 658             (ct)->ct_flags |= MDI_CLIENT_FLAGS_PATH_FREE_IN_PROGRESS;        }
 659 #define MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct)                      {\
 660             ASSERT(MDI_CLIENT_LOCKED(ct));                              \
 661             (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_PATH_FREE_IN_PROGRESS;   }
 662 #define MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct) \
 663             ((ct)->ct_flags & MDI_CLIENT_FLAGS_PATH_FREE_IN_PROGRESS)
 664 
 665 #define MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct)                            {\
 666             ASSERT(MDI_CLIENT_LOCKED(ct));                              \
 667             (ct)->ct_flags |= MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED;    }
 668 #define MDI_CLIENT_IS_DEV_NOT_SUPPORTED(ct) \
 669             ((ct)->ct_flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)
 670 
 671 /*
 672  * Client operating states.
 673  */
 674 #define MDI_CLIENT_STATE_OPTIMAL        1
 675 #define MDI_CLIENT_STATE_DEGRADED       2
 676 #define MDI_CLIENT_STATE_FAILED         3
 677 
 678 #define MDI_CLIENT_STATE(ct) ((ct)->ct_state)
 679 #define MDI_CLIENT_SET_STATE(ct, state) ((ct)->ct_state = state)
 680 
 681 #define MDI_CLIENT_IS_FAILED(ct) \
 682             ((ct)->ct_state == MDI_CLIENT_STATE_FAILED)
 683 
 684 /*
 685  * mdi_pathinfo nodes:
 686  *
 687  * From this framework's perspective, a 'path' is a tuple consisting of a
 688  * client or end device, a host controller which provides device
 689  * identification and transport services (pHCI), and bus specific unit
 690  * addressing information.  A path may be decorated with properties which
 691  * describe the capabilities of the path; such properties are analogous to
 692  * device node and minor node properties.
 693  *
 694  * The framework maintains link list of mdi_pathinfo nodes created by every
 695  * pHCI driver instance via the pi_phci_link linkage; this is used (for example)
 696  * to make sure that all relevant pathinfo nodes are freed before the pHCI
 697  * is unregistered.
 698  *
 699  * Locking order:
 700  *
 701  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))         XXX
 702  * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))       XXX
 703  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))           XXX
 704  * _NOTE(LOCK_ORDER(devinfo_tree_lock mdi_pathinfo::pi_mutex))          XXX
 705  *
 706  * mdi_pathinfo node structure definition
 707  */
 708 struct mdi_pathinfo {
 709         /* protected by MDI_PHCI_LOCK ph_mutex... */
 710         struct mdi_pathinfo     *pi_phci_link;   /* next path in phci list */
 711         mdi_phci_t              *pi_phci;       /* pHCI dev_info node   */
 712 
 713         /* protected by MDI_CLIENT_LOCK ct_mutex... */
 714         struct mdi_pathinfo     *pi_client_link; /* next path in client list */
 715         mdi_client_t            *pi_client;     /* client               */
 716 
 717         /* protected by MDI_VHCI_CLIENT_LOCK vh_client_mutex... */
 718         char                    *pi_addr;       /* path unit address    */
 719         int                     pi_path_instance; /* path instance */
 720 
 721         /* protected by MDI_PI_LOCK pi_mutex... */
 722         kmutex_t                pi_mutex;       /* per path mutex       */
 723         mdi_pathinfo_state_t    pi_state;       /* path state           */
 724         mdi_pathinfo_state_t    pi_old_state;   /* path state           */
 725         kcondvar_t              pi_state_cv;    /* path state condvar   */
 726         nvlist_t                *pi_prop;       /* Properties           */
 727         void                    *pi_cprivate;   /* client private info  */
 728         void                    *pi_pprivate;   /* phci private info    */
 729         int                     pi_ref_cnt;     /* pi reference count   */
 730         kcondvar_t              pi_ref_cv;      /* condition variable   */
 731         struct mdi_pi_kstats    *pi_kstats;     /* aggregate kstats */
 732         int                     pi_pm_held;     /* phci's kidsup incremented */
 733         int                     pi_preferred;   /* Preferred path       */
 734         void                    *pi_vprivate;   /* vhci private info    */
 735         uint_t                  pi_flags;       /* path flags */
 736 };
 737 
 738 /*
 739  * pathinfo statistics:
 740  *
 741  * The mpxio architecture allows for multiple pathinfo nodes for each
 742  * client-pHCI combination.  For statistics purposes, these statistics are
 743  * aggregated into a single client-pHCI set of kstats.
 744  */
 745 struct mdi_pi_kstats {
 746         int     pi_kstat_ref;           /* # paths aggregated, also a ref cnt */
 747         kstat_t *pi_kstat_iostats;      /* mdi:iopath statistic set */
 748         kstat_t *pi_kstat_errstats;     /* error statistics */
 749 };
 750 
 751 /*
 752  * pathinfo error kstat
 753  */
 754 struct pi_errs {
 755         struct kstat_named pi_softerrs;         /* "Soft" Error */
 756         struct kstat_named pi_harderrs;         /* "Hard" Error */
 757         struct kstat_named pi_transerrs;        /* Transport Errors */
 758         struct kstat_named pi_icnt_busy;        /* Interconnect Busy */
 759         struct kstat_named pi_icnt_errors;      /* Interconnect Errors */
 760         struct kstat_named pi_phci_rsrc;        /* pHCI No Resources */
 761         struct kstat_named pi_phci_localerr;    /* pHCI Local Errors */
 762         struct kstat_named pi_phci_invstate;    /* pHCI Invalid State */
 763         struct kstat_named pi_failedfrom;       /* Failover: Failed From */
 764         struct kstat_named pi_failedto;         /* Failover: Failed To */
 765 };
 766 
 767 /*
 768  * increment an error counter
 769  */
 770 #define MDI_PI_ERRSTAT(pip, x) { \
 771         if (MDI_PI((pip))->pi_kstats != NULL) { \
 772                 struct pi_errs *pep; \
 773                 pep = MDI_PI(pip)->pi_kstats->pi_kstat_errstats->ks_data; \
 774                 pep->x.value.ui32++; \
 775         } \
 776 }
 777 
 778 /*
 779  * error codes which can be passed to MDI_PI_ERRSTAT
 780  */
 781 #define MDI_PI_SOFTERR  pi_softerrs
 782 #define MDI_PI_HARDERR  pi_harderrs
 783 #define MDI_PI_TRANSERR pi_transerrs
 784 #define MDI_PI_ICNTBUSY pi_icnt_busy
 785 #define MDI_PI_ICNTERR  pi_icnt_errors
 786 #define MDI_PI_PHCIRSRC pi_phci_rsrc
 787 #define MDI_PI_PHCILOCL pi_phci_localerr
 788 #define MDI_PI_PHCIINVS pi_phci_invstate
 789 #define MDI_PI_FAILFROM pi_failedfrom
 790 #define MDI_PI_FAILTO   pi_failedto
 791 
 792 #define MDI_PI(type)                    ((struct mdi_pathinfo *)(type))
 793 
 794 #define MDI_PI_LOCK(pip)                mutex_enter(&MDI_PI(pip)->pi_mutex)
 795 #define MDI_PI_TRYLOCK(pip)             mutex_tryenter(&MDI_PI(pip)->pi_mutex)
 796 #define MDI_PI_UNLOCK(pip)              mutex_exit(&MDI_PI(pip)->pi_mutex)
 797 #ifdef  DEBUG
 798 #define MDI_PI_LOCKED(pip)              MUTEX_HELD(&MDI_PI(pip)->pi_mutex)
 799 #endif  /* DEBUG */
 800 
 801 #define MDI_PI_HOLD(pip)                (++MDI_PI(pip)->pi_ref_cnt)
 802 #define MDI_PI_RELE(pip)                (--MDI_PI(pip)->pi_ref_cnt)
 803 
 804 #define MDI_EXT_STATE_CHANGE            0x10000000
 805 
 806 
 807 #define MDI_DISABLE_OP                  0x1
 808 #define MDI_ENABLE_OP                   0x2
 809 #define MDI_BEFORE_STATE_CHANGE         0x4
 810 #define MDI_AFTER_STATE_CHANGE          0x8
 811 #define MDI_SYNC_FLAG                   0x10
 812 
 813 #define MDI_PI_STATE(pip)                                               \
 814         (MDI_PI((pip))->pi_state & MDI_PATHINFO_STATE_MASK)
 815 #define MDI_PI_OLD_STATE(pip)                                           \
 816         (MDI_PI((pip))->pi_old_state & MDI_PATHINFO_STATE_MASK)
 817 
 818 #define MDI_PI_EXT_STATE(pip)                                           \
 819         (MDI_PI((pip))->pi_state & MDI_PATHINFO_EXT_STATE_MASK)
 820 #define MDI_PI_OLD_EXT_STATE(pip)                                       \
 821         (MDI_PI((pip))->pi_old_state & MDI_PATHINFO_EXT_STATE_MASK)
 822 
 823 #define MDI_PI_SET_TRANSIENT(pip)                                       {\
 824         ASSERT(MDI_PI_LOCKED(pip));                                     \
 825         MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_TRANSIENT;               }
 826 #define MDI_PI_CLEAR_TRANSIENT(pip)                                     {\
 827         ASSERT(MDI_PI_LOCKED(pip));                                     \
 828         MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_TRANSIENT;          }
 829 #define MDI_PI_IS_TRANSIENT(pip) \
 830         (MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_TRANSIENT)
 831 
 832 #define MDI_PI_SET_USER_DISABLE(pip)                                    {\
 833         ASSERT(MDI_PI_LOCKED(pip));                                     \
 834         MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_USER_DISABLE;    }
 835 #define MDI_PI_SET_DRV_DISABLE(pip)                                     {\
 836         ASSERT(MDI_PI_LOCKED(pip));                                     \
 837         MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_DRV_DISABLE;     }
 838 #define MDI_PI_SET_DRV_DISABLE_TRANS(pip)                               {\
 839         ASSERT(MDI_PI_LOCKED(pip));                                     \
 840         MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_DRV_DISABLE_TRANSIENT; }
 841 
 842 #define MDI_PI_SET_USER_ENABLE(pip)                                     {\
 843         ASSERT(MDI_PI_LOCKED(pip));                                     \
 844         MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_USER_DISABLE;       }
 845 #define MDI_PI_SET_DRV_ENABLE(pip)                                      {\
 846         ASSERT(MDI_PI_LOCKED(pip));                                     \
 847         MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_DRV_DISABLE;        }
 848 #define MDI_PI_SET_DRV_ENABLE_TRANS(pip)                                {\
 849         ASSERT(MDI_PI_LOCKED(pip));                                     \
 850         MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_DRV_DISABLE_TRANSIENT; }
 851 
 852 #define MDI_PI_IS_USER_DISABLE(pip)                                     \
 853         (MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_USER_DISABLE)
 854 #define MDI_PI_IS_DRV_DISABLE(pip)                                      \
 855         (MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_DRV_DISABLE)
 856 #define MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip)                            \
 857         (MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_DRV_DISABLE_TRANSIENT)
 858 
 859 #define MDI_PI_IS_DISABLE(pip)                                          \
 860         (MDI_PI_IS_USER_DISABLE(pip) ||                                 \
 861         MDI_PI_IS_DRV_DISABLE(pip) ||                                   \
 862         MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip))
 863 
 864 #define MDI_PI_IS_INIT(pip)                                             \
 865         ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) ==            \
 866                 MDI_PATHINFO_STATE_INIT)
 867 
 868 #define MDI_PI_IS_INITING(pip)                                          \
 869         ((MDI_PI(pip)->pi_state & ~MDI_PATHINFO_EXT_STATE_MASK) ==       \
 870                 (MDI_PATHINFO_STATE_INIT | MDI_PATHINFO_STATE_TRANSIENT))
 871 
 872 #define MDI_PI_SET_INIT(pip)                                            {\
 873         ASSERT(MDI_PI_LOCKED(pip));                                     \
 874         MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT;             }
 875 
 876 #define MDI_PI_SET_ONLINING(pip)                                        {\
 877         uint32_t        ext_state;                                      \
 878         ASSERT(MDI_PI_LOCKED(pip));                                     \
 879         ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \
 880         MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip);                       \
 881         MDI_PI(pip)->pi_state =                                              \
 882         (MDI_PATHINFO_STATE_ONLINE | MDI_PATHINFO_STATE_TRANSIENT);     \
 883         MDI_PI(pip)->pi_state |= ext_state;                          }
 884 
 885 #define MDI_PI_IS_ONLINING(pip)                                         \
 886         ((MDI_PI(pip)->pi_state & ~MDI_PATHINFO_EXT_STATE_MASK) ==       \
 887         (MDI_PATHINFO_STATE_ONLINE | MDI_PATHINFO_STATE_TRANSIENT))
 888 
 889 #define MDI_PI_SET_ONLINE(pip)                                          {\
 890         uint32_t        ext_state;                                      \
 891         ASSERT(MDI_PI_LOCKED(pip));                                     \
 892         ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \
 893         MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_ONLINE;           \
 894         MDI_PI(pip)->pi_state |= ext_state;                          }
 895 
 896 #define MDI_PI_IS_ONLINE(pip)                                           \
 897         ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) ==            \
 898         MDI_PATHINFO_STATE_ONLINE)
 899 
 900 #define MDI_PI_SET_OFFLINING(pip)                                       {\
 901         uint32_t        ext_state;                                      \
 902         ASSERT(MDI_PI_LOCKED(pip));                                     \
 903         ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \
 904         MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip);                       \
 905         MDI_PI(pip)->pi_state =                                              \
 906         (MDI_PATHINFO_STATE_OFFLINE | MDI_PATHINFO_STATE_TRANSIENT);    \
 907         MDI_PI(pip)->pi_state |= ext_state;                          }
 908 
 909 #define MDI_PI_IS_OFFLINING(pip)                                        \
 910         ((MDI_PI(pip)->pi_state & ~MDI_PATHINFO_EXT_STATE_MASK) ==       \
 911         (MDI_PATHINFO_STATE_OFFLINE | MDI_PATHINFO_STATE_TRANSIENT))
 912 
 913 #define MDI_PI_SET_OFFLINE(pip)                                         {\
 914         uint32_t        ext_state;                                      \
 915         ASSERT(MDI_PI_LOCKED(pip));                                     \
 916         ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \
 917         MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_OFFLINE;          \
 918         MDI_PI(pip)->pi_state |= ext_state;                          }
 919 
 920 #define MDI_PI_IS_OFFLINE(pip)                                          \
 921         ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) ==            \
 922         MDI_PATHINFO_STATE_OFFLINE)
 923 
 924 #define MDI_PI_SET_STANDBYING(pip)                                      {\
 925         uint32_t        ext_state;                                      \
 926         ASSERT(MDI_PI_LOCKED(pip));                                     \
 927         ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \
 928         MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip);                       \
 929         MDI_PI(pip)->pi_state =                                              \
 930         (MDI_PATHINFO_STATE_STANDBY | MDI_PATHINFO_STATE_TRANSIENT);    \
 931         MDI_PI(pip)->pi_state |= ext_state;                          }
 932 
 933 #define MDI_PI_SET_STANDBY(pip)                                         {\
 934         uint32_t        ext_state;                                      \
 935         ASSERT(MDI_PI_LOCKED(pip));                                     \
 936         ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \
 937         MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_STANDBY;          \
 938         MDI_PI(pip)->pi_state |= ext_state;                          }
 939 
 940 #define MDI_PI_IS_STANDBY(pip)                                          \
 941         ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) ==            \
 942         MDI_PATHINFO_STATE_STANDBY)
 943 
 944 #define MDI_PI_SET_FAULTING(pip)                                        {\
 945         uint32_t        ext_state;                                      \
 946         ASSERT(MDI_PI_LOCKED(pip));                                     \
 947         ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \
 948         MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip);                       \
 949         MDI_PI(pip)->pi_state =                                              \
 950             (MDI_PATHINFO_STATE_FAULT | MDI_PATHINFO_STATE_TRANSIENT);  \
 951         MDI_PI(pip)->pi_state |= ext_state;                          }
 952 
 953 #define MDI_PI_SET_FAULT(pip)                                           {\
 954         uint32_t        ext_state;                                      \
 955         ASSERT(MDI_PI_LOCKED(pip));                                     \
 956         ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \
 957         MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_FAULT;            \
 958         MDI_PI(pip)->pi_state |= ext_state;                          }
 959 
 960 #define MDI_PI_IS_FAULT(pip)                                            \
 961         ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) ==            \
 962         MDI_PATHINFO_STATE_FAULT)
 963 
 964 #define MDI_PI_IS_SUSPENDED(pip)                                        \
 965         ((MDI_PI(pip))->pi_phci->ph_flags & MDI_PHCI_FLAGS_SUSPEND)
 966 
 967 #define MDI_PI_FLAGS_SET_HIDDEN(pip)                                    {\
 968         ASSERT(MDI_PI_LOCKED(pip));                                     \
 969         MDI_PI(pip)->pi_flags |= MDI_PATHINFO_FLAGS_HIDDEN;          }
 970 #define MDI_PI_FLAGS_CLR_HIDDEN(pip)                                    {\
 971         ASSERT(MDI_PI_LOCKED(pip));                                     \
 972         MDI_PI(pip)->pi_flags &= ~MDI_PATHINFO_FLAGS_HIDDEN;             }
 973 #define MDI_PI_FLAGS_IS_HIDDEN(pip)                                     \
 974         ((MDI_PI(pip)->pi_flags & MDI_PATHINFO_FLAGS_HIDDEN) ==          \
 975         MDI_PATHINFO_FLAGS_HIDDEN)
 976 
 977 #define MDI_PI_FLAGS_SET_DEVICE_REMOVED(pip)                            {\
 978         ASSERT(MDI_PI_LOCKED(pip));                                     \
 979         MDI_PI(pip)->pi_flags |= MDI_PATHINFO_FLAGS_DEVICE_REMOVED;  }
 980 #define MDI_PI_FLAGS_CLR_DEVICE_REMOVED(pip)                            {\
 981         ASSERT(MDI_PI_LOCKED(pip));                                     \
 982         MDI_PI(pip)->pi_flags &= ~MDI_PATHINFO_FLAGS_DEVICE_REMOVED;     }
 983 #define MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)                             \
 984         ((MDI_PI(pip)->pi_flags & MDI_PATHINFO_FLAGS_DEVICE_REMOVED) ==  \
 985         MDI_PATHINFO_FLAGS_DEVICE_REMOVED)
 986 
 987 /*
 988  * mdi_vhcache_client, mdi_vhcache_pathinfo, and mdi_vhcache_phci structures
 989  * hold the vhci to phci client mappings of the on-disk vhci busconfig cache.
 990  */
 991 
 992 /* phci structure of vhci cache */
 993 typedef struct mdi_vhcache_phci {
 994         char                    *cphci_path;    /* phci path name */
 995         uint32_t                cphci_id;       /* used when building nvlist */
 996         mdi_phci_t              *cphci_phci;    /* pointer to actual phci */
 997         struct mdi_vhcache_phci *cphci_next;    /* next in vhci phci list */
 998 } mdi_vhcache_phci_t;
 999 
1000 /* pathinfo structure of vhci cache */
1001 typedef struct mdi_vhcache_pathinfo {
1002         char                    *cpi_addr;      /* path address */
1003         mdi_vhcache_phci_t      *cpi_cphci;     /* phci the path belongs to */
1004         struct mdi_pathinfo     *cpi_pip;       /* ptr to actual pathinfo */
1005         uint32_t                cpi_flags;      /* see below */
1006         struct mdi_vhcache_pathinfo *cpi_next;  /* next path for the client */
1007 } mdi_vhcache_pathinfo_t;
1008 
1009 /*
1010  * cpi_flags
1011  *
1012  * MDI_CPI_HINT_PATH_DOES_NOT_EXIST - set when configuration of the path has
1013  * failed.
1014  */
1015 #define MDI_CPI_HINT_PATH_DOES_NOT_EXIST        0x0001
1016 
1017 /* client structure of vhci cache */
1018 typedef struct mdi_vhcache_client {
1019         char                    *cct_name_addr; /* client address */
1020         mdi_vhcache_pathinfo_t  *cct_cpi_head;  /* client's path list head */
1021         mdi_vhcache_pathinfo_t  *cct_cpi_tail;  /* client's path list tail */
1022         struct mdi_vhcache_client *cct_next;    /* next in vhci client list */
1023 } mdi_vhcache_client_t;
1024 
1025 /* vhci cache structure - one for vhci instance */
1026 typedef struct mdi_vhci_cache {
1027         mdi_vhcache_phci_t      *vhcache_phci_head;     /* phci list head */
1028         mdi_vhcache_phci_t      *vhcache_phci_tail;     /* phci list tail */
1029         mdi_vhcache_client_t    *vhcache_client_head;   /* client list head */
1030         mdi_vhcache_client_t    *vhcache_client_tail;   /* client list tail */
1031         mod_hash_t              *vhcache_client_hash;   /* client hash */
1032         int                     vhcache_flags;          /* see below */
1033         int64_t                 vhcache_clean_time;     /* last clean time */
1034         krwlock_t               vhcache_lock;           /* cache lock */
1035 } mdi_vhci_cache_t;
1036 
1037 /* vhcache_flags */
1038 #define MDI_VHCI_CACHE_SETUP_DONE       0x0001  /* cache setup completed */
1039 
1040 /* vhci bus config structure - one for vhci instance */
1041 typedef struct mdi_vhci_config {
1042         char                    *vhc_vhcache_filename;  /* on-disk file name */
1043         mdi_vhci_cache_t        vhc_vhcache;            /* vhci cache */
1044         kmutex_t                vhc_lock;               /* vhci config lock */
1045         kcondvar_t              vhc_cv;
1046         int                     vhc_flags;              /* see below */
1047 
1048         /* flush vhci cache when lbolt reaches vhc_flush_at_ticks */
1049         clock_t                 vhc_flush_at_ticks;
1050 
1051         /*
1052          * Head and tail of the client list whose paths are being configured
1053          * asynchronously. vhc_acc_count is the number of clients on this list.
1054          * vhc_acc_thrcount is the number threads running to configure
1055          * the paths for these clients.
1056          */
1057         struct mdi_async_client_config *vhc_acc_list_head;
1058         struct mdi_async_client_config *vhc_acc_list_tail;
1059         int                     vhc_acc_count;
1060         int                     vhc_acc_thrcount;
1061 
1062         /* callback id - for flushing the cache during system shutdown */
1063         callb_id_t              vhc_cbid;
1064 
1065         /*
1066          * vhc_path_discovery_boot -    number of times path discovery will be
1067          *                              attempted during early boot.
1068          * vhc_path_discovery_postboot  number of times path discovery will be
1069          *                              attempted during late boot.
1070          * vhc_path_discovery_cutoff_time - time at which paths were last
1071          *                              discovered  + some timeout
1072          */
1073         int                     vhc_path_discovery_boot;
1074         int                     vhc_path_discovery_postboot;
1075         int64_t                 vhc_path_discovery_cutoff_time;
1076 } mdi_vhci_config_t;
1077 
1078 /* vhc_flags */
1079 #define MDI_VHC_SINGLE_THREADED         0x0001  /* config single threaded */
1080 #define MDI_VHC_EXIT                    0x0002  /* exit all config activity */
1081 #define MDI_VHC_VHCACHE_DIRTY           0x0004  /* cache dirty */
1082 #define MDI_VHC_VHCACHE_FLUSH_THREAD    0x0008  /* cache flush thead running */
1083 #define MDI_VHC_VHCACHE_FLUSH_ERROR     0x0010  /* failed to flush cache */
1084 #define MDI_VHC_READONLY_FS             0x0020  /* filesys is readonly */
1085 
1086 typedef struct mdi_phys_path {
1087         char                    *phys_path;
1088         struct mdi_phys_path    *phys_path_next;
1089 } mdi_phys_path_t;
1090 
1091 /*
1092  * Lookup tokens are used to cache the result of the vhci cache client lookup
1093  * operations (to reduce the number of real lookup operations).
1094  */
1095 typedef struct mdi_vhcache_lookup_token {
1096         mdi_vhcache_client_t    *lt_cct;                /* vhcache client */
1097         int64_t                 lt_cct_lookup_time;     /* last lookup time */
1098 } mdi_vhcache_lookup_token_t;
1099 
1100 /* asynchronous configuration of client paths */
1101 typedef struct mdi_async_client_config {
1102         char                    *acc_ct_name;   /* client name */
1103         char                    *acc_ct_addr;   /* client address */
1104         mdi_phys_path_t         *acc_phclient_path_list_head;   /* path head */
1105         mdi_vhcache_lookup_token_t acc_token;   /* lookup token */
1106         struct mdi_async_client_config *acc_next; /* next in vhci acc list */
1107 } mdi_async_client_config_t;
1108 
1109 /*
1110  * vHCI driver instance registration/unregistration
1111  *
1112  * mdi_vhci_register() is called by a vHCI driver to register itself as the
1113  * manager of devices from a particular 'class'.  This should be called from
1114  * attach(9e).
1115  *
1116  * mdi_vhci_unregister() is called from detach(9E) to unregister a vHCI
1117  * instance from the framework.
1118  */
1119 int             mdi_vhci_register(char *, dev_info_t *, mdi_vhci_ops_t *, int);
1120 int             mdi_vhci_unregister(dev_info_t *, int);
1121 
1122 /*
1123  * Utility functions
1124  */
1125 int             mdi_phci_get_path_count(dev_info_t *);
1126 dev_info_t      *mdi_phci_path2devinfo(dev_info_t *, caddr_t);
1127 
1128 
1129 /*
1130  * Path Selection Functions:
1131  *
1132  * mdi_select_path() is called by a vHCI driver to select to which path an
1133  * I/O request should be routed.  The caller passes the 'buf' structure as
1134  * one of the parameters.  The mpxio framework uses the buf's contents to
1135  * maintain per path statistics (total I/O size / count pending).  If more
1136  * than one online path is available, the framework automatically selects
1137  * a suitable one.  If a failover operation is active for this client device
1138  * the call fails, returning MDI_BUSY.
1139  *
1140  * By default this function returns a suitable path in the 'online' state,
1141  * based on the current load balancing policy.  Currently we support
1142  * LOAD_BALANCE_NONE (Previously selected online path will continue to be
1143  * used as long as the path is usable) and LOAD_BALANCE_RR (Online paths
1144  * will be selected in a round robin fashion).  The load balancing scheme
1145  * can be configured in the vHCI driver's configuration file (driver.conf).
1146  *
1147  * vHCI drivers may override this default behavior by specifying appropriate
1148  * flags.  If start_pip is specified (non NULL), it is used as the routine's
1149  * starting point; it starts walking from there to find the next appropriate
1150  * path.
1151  *
1152  * The following values for 'flags' are currently defined, the third argument
1153  * to mdi_select_path depends on the flags used.
1154  *
1155  *   <none>:                              default, arg is pip
1156  *   MDI_SELECT_ONLINE_PATH:            select an ONLINE path preferred-first,
1157  *                                      arg is pip
1158  *   MDI_SELECT_STANDBY_PATH:           select a STANDBY path, arg is pip
1159  *   MDI_SELECT_USER_DISABLE_PATH:      select user disable for failover and
1160  *                                      auto_failback
1161  *   MDI_SELECT_PATH_INSTANCE:          select a specific path, arg is
1162  *                                      path instance
1163  *   MDI_SELECT_NO_PREFERRED:           select path without preferred-first
1164  *
1165  * The selected paths are returned in an mdi_hold_path() state (pi_ref_cnt),
1166  * caller should release the hold by calling mdi_rele_path() at the end of
1167  * operation.
1168  */
1169 int             mdi_select_path(dev_info_t *, struct buf *, int,
1170                     void *, mdi_pathinfo_t **);
1171 int             mdi_set_lb_policy(dev_info_t *, client_lb_t);
1172 int             mdi_set_lb_region_size(dev_info_t *, int);
1173 client_lb_t     mdi_get_lb_policy(dev_info_t *);
1174 
1175 /*
1176  * flags for mdi_select_path() routine
1177  */
1178 #define MDI_SELECT_ONLINE_PATH          0x0001
1179 #define MDI_SELECT_STANDBY_PATH         0x0002
1180 #define MDI_SELECT_USER_DISABLE_PATH    0x0004
1181 #define MDI_SELECT_PATH_INSTANCE        0x0008
1182 #define MDI_SELECT_NO_PREFERRED         0x0010
1183 
1184 /*
1185  * MDI client device utility functions
1186  */
1187 int             mdi_client_get_path_count(dev_info_t *);
1188 dev_info_t      *mdi_client_path2devinfo(dev_info_t *, caddr_t);
1189 
1190 /*
1191  * Failover:
1192  *
1193  * The vHCI driver calls mdi_failover() to initiate a failover operation.
1194  * mdi_failover() calls back into the vHCI driver's vo_failover()
1195  * entry point to perform the actual failover operation.  The reason
1196  * for requiring the vHCI driver to initiate failover by calling
1197  * mdi_failover(), instead of directly executing vo_failover() itself,
1198  * is to ensure that the mdi framework can keep track of the client
1199  * state properly.  Additionally, mdi_failover() provides as a
1200  * convenience the option of performing the failover operation
1201  * synchronously or asynchronously
1202  *
1203  * Upon successful completion of the failover operation, the paths that were
1204  * previously ONLINE will be in the STANDBY state, and the newly activated
1205  * paths will be in the ONLINE state.
1206  *
1207  * The flags modifier determines whether the activation is done synchronously
1208  */
1209 int mdi_failover(dev_info_t *, dev_info_t *, int);
1210 
1211 /*
1212  * Client device failover mode of operation
1213  */
1214 #define MDI_FAILOVER_SYNC       1       /* Synchronous Failover         */
1215 #define MDI_FAILOVER_ASYNC      2       /* Asynchronous Failover        */
1216 
1217 /*
1218  * mdi_is_dev_supported: The pHCI driver bus_config implementation calls
1219  * mdi_is_dev_supported to determine if a child device should is supported as
1220  * a vHCI child (i.e. as a client). The method used to specify the child
1221  * device, via the cinfo argument, is by agreement between the pHCI and the
1222  * vHCI.  In the case of SCSA and scsi_vhci cinfo is a pointer to the pHCI
1223  * probe dev_info node, which is decorated with the device idenity information
1224  * necessary to determine scsi_vhci support.
1225  */
1226 int mdi_is_dev_supported(char *class, dev_info_t *pdip, void *cinfo);
1227 
1228 /*
1229  * mdi_pathinfo node kstat functions.
1230  */
1231 int mdi_pi_kstat_exists(mdi_pathinfo_t *);
1232 int mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ks_name);
1233 void mdi_pi_kstat_iosupdate(mdi_pathinfo_t *, struct buf *);
1234 
1235 /*
1236  * mdi_pathinfo node extended state change functions.
1237  */
1238 int mdi_pi_get_state2(mdi_pathinfo_t *, mdi_pathinfo_state_t *, uint32_t *);
1239 int mdi_pi_get_preferred(mdi_pathinfo_t *);
1240 
1241 /*
1242  * mdi_pathinfo node member functions
1243  */
1244 void *mdi_pi_get_client_private(mdi_pathinfo_t *);
1245 void mdi_pi_set_client_private(mdi_pathinfo_t *, void *);
1246 void mdi_pi_set_state(mdi_pathinfo_t *, mdi_pathinfo_state_t);
1247 void mdi_pi_set_preferred(mdi_pathinfo_t *, int);
1248 
1249 /* get/set vhci private data */
1250 void *mdi_client_get_vhci_private(dev_info_t *);
1251 void mdi_client_set_vhci_private(dev_info_t *, void *);
1252 void *mdi_phci_get_vhci_private(dev_info_t *);
1253 void mdi_phci_set_vhci_private(dev_info_t *, void *);
1254 void *mdi_pi_get_vhci_private(mdi_pathinfo_t *);
1255 void mdi_pi_set_vhci_private(mdi_pathinfo_t *, void *);
1256 int mdi_dc_return_dev_state(mdi_pathinfo_t *pip, struct devctl_iocdata *dcp);
1257 
1258 /*
1259  * mdi_pathinfo Property utilities
1260  */
1261 int mdi_prop_size(mdi_pathinfo_t *, size_t *);
1262 int mdi_prop_pack(mdi_pathinfo_t *, char **, uint_t);
1263 
1264 /* obsolete interface, to be removed */
1265 void mdi_get_next_path(dev_info_t *, mdi_pathinfo_t *, mdi_pathinfo_t **);
1266 int mdi_get_component_type(dev_info_t *);
1267 
1268 #endif  /* _KERNEL */
1269 
1270 #ifdef  __cplusplus
1271 }
1272 #endif
1273 
1274 #endif  /* _SYS_MDI_IMPLDEFS_H */