1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2018 Nexenta Systems, Inc.
25 */
26
27 /*
28 * Multipath driver interface (MDI) implementation; see mdi_impldefs.h for a
29 * more detailed discussion of the overall mpxio architecture.
30 */
31
32 #include <sys/note.h>
33 #include <sys/types.h>
34 #include <sys/varargs.h>
35 #include <sys/param.h>
36 #include <sys/errno.h>
37 #include <sys/uio.h>
38 #include <sys/buf.h>
39 #include <sys/modctl.h>
40 #include <sys/open.h>
41 #include <sys/kmem.h>
42 #include <sys/poll.h>
43 #include <sys/conf.h>
44 #include <sys/bootconf.h>
45 #include <sys/cmn_err.h>
46 #include <sys/stat.h>
47 #include <sys/ddi.h>
48 #include <sys/sunddi.h>
49 #include <sys/ddipropdefs.h>
50 #include <sys/sunndi.h>
51 #include <sys/ndi_impldefs.h>
52 #include <sys/promif.h>
53 #include <sys/sunmdi.h>
54 #include <sys/mdi_impldefs.h>
55 #include <sys/taskq.h>
56 #include <sys/epm.h>
57 #include <sys/sunpm.h>
58 #include <sys/modhash.h>
59 #include <sys/disp.h>
60 #include <sys/autoconf.h>
61 #include <sys/sysmacros.h>
62
63 #ifdef DEBUG
64 #include <sys/debug.h>
65 int mdi_debug = 1;
66 int mdi_debug_logonly = 0;
67 #define MDI_DEBUG(dbglevel, pargs) if (mdi_debug >= (dbglevel)) i_mdi_log pargs
68 #define MDI_WARN CE_WARN, __func__
69 #define MDI_NOTE CE_NOTE, __func__
70 #define MDI_CONT CE_CONT, __func__
71 static void i_mdi_log(int, const char *, dev_info_t *, const char *, ...);
72 #else /* !DEBUG */
73 #define MDI_DEBUG(dbglevel, pargs)
74 #endif /* DEBUG */
75 int mdi_debug_consoleonly = 0;
76 int mdi_delay = 3;
77
78 extern pri_t minclsyspri;
79 extern int modrootloaded;
80
81 /*
82 * Global mutex:
83 * Protects vHCI list and structure members.
84 */
85 kmutex_t mdi_mutex;
86
87 /*
88 * Registered vHCI class driver lists
89 */
90 int mdi_vhci_count;
91 mdi_vhci_t *mdi_vhci_head;
92 mdi_vhci_t *mdi_vhci_tail;
93
94 /*
95 * Client Hash Table size
96 */
97 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE;
98
99 /*
100 * taskq interface definitions
101 */
102 #define MDI_TASKQ_N_THREADS 8
103 #define MDI_TASKQ_PRI minclsyspri
104 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads)
105 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads)
106
107 taskq_t *mdi_taskq;
108 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS;
109
110 #define TICKS_PER_SECOND (drv_usectohz(1000000))
111
112 /*
113 * The data should be "quiet" for this interval (in seconds) before the
114 * vhci cached data is flushed to the disk.
115 */
116 static int mdi_vhcache_flush_delay = 10;
117
118 /* number of seconds the vhcache flush daemon will sleep idle before exiting */
119 static int mdi_vhcache_flush_daemon_idle_time = 60;
120
121 /*
122 * MDI falls back to discovery of all paths when a bus_config_one fails.
123 * The following parameters can be used to tune this operation.
124 *
125 * mdi_path_discovery_boot
126 * Number of times path discovery will be attempted during early boot.
127 * Probably there is no reason to ever set this value to greater than one.
128 *
129 * mdi_path_discovery_postboot
130 * Number of times path discovery will be attempted after early boot.
131 * Set it to a minimum of two to allow for discovery of iscsi paths which
132 * may happen very late during booting.
133 *
134 * mdi_path_discovery_interval
135 * Minimum number of seconds MDI will wait between successive discovery
136 * of all paths. Set it to -1 to disable discovery of all paths.
137 */
138 static int mdi_path_discovery_boot = 1;
139 static int mdi_path_discovery_postboot = 2;
140 static int mdi_path_discovery_interval = 10;
141
142 /*
143 * number of seconds the asynchronous configuration thread will sleep idle
144 * before exiting.
145 */
146 static int mdi_async_config_idle_time = 600;
147
148 static int mdi_bus_config_cache_hash_size = 256;
149
150 /* turns off multithreaded configuration for certain operations */
151 static int mdi_mtc_off = 0;
152
153 /*
154 * The "path" to a pathinfo node is identical to the /devices path to a
155 * devinfo node had the device been enumerated under a pHCI instead of
156 * a vHCI. This pathinfo "path" is associated with a 'path_instance'.
157 * This association persists across create/delete of the pathinfo nodes,
158 * but not across reboot.
159 */
160 static uint_t mdi_pathmap_instance = 1; /* 0 -> any path */
161 static int mdi_pathmap_hash_size = 256;
162 static kmutex_t mdi_pathmap_mutex;
163 static mod_hash_t *mdi_pathmap_bypath; /* "path"->instance */
164 static mod_hash_t *mdi_pathmap_byinstance; /* instance->"path" */
165 static mod_hash_t *mdi_pathmap_sbyinstance; /* inst->shortpath */
166
167 /*
168 * MDI component property name/value string definitions
169 */
170 const char *mdi_component_prop = "mpxio-component";
171 const char *mdi_component_prop_vhci = "vhci";
172 const char *mdi_component_prop_phci = "phci";
173 const char *mdi_component_prop_client = "client";
174
175 /*
176 * MDI client global unique identifier property name
177 */
178 const char *mdi_client_guid_prop = "client-guid";
179
180 /*
181 * MDI client load balancing property name/value string definitions
182 */
183 const char *mdi_load_balance = "load-balance";
184 const char *mdi_load_balance_none = "none";
185 const char *mdi_load_balance_rr = "round-robin";
186 const char *mdi_load_balance_lba = "logical-block";
187
188 /*
189 * Obsolete vHCI class definition; to be removed after Leadville update
190 */
191 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI;
192
193 static char vhci_greeting[] =
194 "\tThere already exists one vHCI driver for class %s\n"
195 "\tOnly one vHCI driver for each class is allowed\n";
196
197 /*
198 * Static function prototypes
199 */
200 static int i_mdi_phci_offline(dev_info_t *, uint_t);
201 static int i_mdi_client_offline(dev_info_t *, uint_t);
202 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t);
203 static void i_mdi_phci_post_detach(dev_info_t *,
204 ddi_detach_cmd_t, int);
205 static int i_mdi_client_pre_detach(dev_info_t *,
206 ddi_detach_cmd_t);
207 static void i_mdi_client_post_detach(dev_info_t *,
208 ddi_detach_cmd_t, int);
209 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *);
210 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *);
211 static int i_mdi_lba_lb(mdi_client_t *ct,
212 mdi_pathinfo_t **ret_pip, struct buf *buf);
213 static void i_mdi_pm_hold_client(mdi_client_t *, int);
214 static void i_mdi_pm_rele_client(mdi_client_t *, int);
215 static void i_mdi_pm_reset_client(mdi_client_t *);
216 static int i_mdi_power_all_phci(mdi_client_t *);
217 static void i_mdi_log_sysevent(dev_info_t *, char *, char *);
218
219
220 /*
221 * Internal mdi_pathinfo node functions
222 */
223 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *);
224
225 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *);
226 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *);
227 static mdi_phci_t *i_devi_get_phci(dev_info_t *);
228 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *);
229 static void i_mdi_phci_unlock(mdi_phci_t *);
230 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *);
231 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *);
232 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *);
233 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *,
234 mdi_client_t *);
235 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *);
236 static void i_mdi_client_remove_path(mdi_client_t *,
237 mdi_pathinfo_t *);
238
239 static int i_mdi_pi_state_change(mdi_pathinfo_t *,
240 mdi_pathinfo_state_t, int);
241 static int i_mdi_pi_offline(mdi_pathinfo_t *, int);
242 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *,
243 char **, int);
244 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *);
245 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int);
246 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *);
247 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *);
248 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *);
249 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *);
250 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *);
251 static void i_mdi_client_update_state(mdi_client_t *);
252 static int i_mdi_client_compute_state(mdi_client_t *,
253 mdi_phci_t *);
254 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *);
255 static void i_mdi_client_unlock(mdi_client_t *);
256 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *);
257 static mdi_client_t *i_devi_get_client(dev_info_t *);
258 /*
259 * NOTE: this will be removed once the NWS files are changed to use the new
260 * mdi_{enable,disable}_path interfaces
261 */
262 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *,
263 int, int);
264 static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip,
265 mdi_vhci_t *vh, int flags, int op);
266 /*
267 * Failover related function prototypes
268 */
269 static int i_mdi_failover(void *);
270
271 /*
272 * misc internal functions
273 */
274 static int i_mdi_get_hash_key(char *);
275 static int i_map_nvlist_error_to_mdi(int);
276 static void i_mdi_report_path_state(mdi_client_t *,
277 mdi_pathinfo_t *);
278
279 static void setup_vhci_cache(mdi_vhci_t *);
280 static int destroy_vhci_cache(mdi_vhci_t *);
281 static int stop_vhcache_async_threads(mdi_vhci_config_t *);
282 static boolean_t stop_vhcache_flush_thread(void *, int);
283 static void free_string_array(char **, int);
284 static void free_vhcache_phci(mdi_vhcache_phci_t *);
285 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *);
286 static void free_vhcache_client(mdi_vhcache_client_t *);
287 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *);
288 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *);
289 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *);
290 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *);
291 static void vhcache_pi_add(mdi_vhci_config_t *,
292 struct mdi_pathinfo *);
293 static void vhcache_pi_remove(mdi_vhci_config_t *,
294 struct mdi_pathinfo *);
295 static void free_phclient_path_list(mdi_phys_path_t *);
296 static void sort_vhcache_paths(mdi_vhcache_client_t *);
297 static int flush_vhcache(mdi_vhci_config_t *, int);
298 static void vhcache_dirty(mdi_vhci_config_t *);
299 static void free_async_client_config(mdi_async_client_config_t *);
300 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *);
301 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *);
302 static nvlist_t *read_on_disk_vhci_cache(char *);
303 extern int fread_nvlist(char *, nvlist_t **);
304 extern int fwrite_nvlist(char *, nvlist_t *);
305
306 /* called once when first vhci registers with mdi */
307 static void
308 i_mdi_init()
309 {
310 static int initialized = 0;
311
312 if (initialized)
313 return;
314 initialized = 1;
315
316 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL);
317
318 /* Create our taskq resources */
319 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads,
320 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC,
321 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
322 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */
323
324 /* Allocate ['path_instance' <-> "path"] maps */
325 mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL);
326 mdi_pathmap_bypath = mod_hash_create_strhash(
327 "mdi_pathmap_bypath", mdi_pathmap_hash_size,
328 mod_hash_null_valdtor);
329 mdi_pathmap_byinstance = mod_hash_create_idhash(
330 "mdi_pathmap_byinstance", mdi_pathmap_hash_size,
331 mod_hash_null_valdtor);
332 mdi_pathmap_sbyinstance = mod_hash_create_idhash(
333 "mdi_pathmap_sbyinstance", mdi_pathmap_hash_size,
334 mod_hash_null_valdtor);
335 }
336
337 /*
338 * mdi_get_component_type():
339 * Return mpxio component type
340 * Return Values:
341 * MDI_COMPONENT_NONE
342 * MDI_COMPONENT_VHCI
343 * MDI_COMPONENT_PHCI
344 * MDI_COMPONENT_CLIENT
345 * XXX This doesn't work under multi-level MPxIO and should be
346 * removed when clients migrate mdi_component_is_*() interfaces.
347 */
348 int
349 mdi_get_component_type(dev_info_t *dip)
350 {
351 return (DEVI(dip)->devi_mdi_component);
352 }
353
354 /*
355 * mdi_vhci_register():
356 * Register a vHCI module with the mpxio framework
357 * mdi_vhci_register() is called by vHCI drivers to register the
358 * 'class_driver' vHCI driver and its MDI entrypoints with the
359 * mpxio framework. The vHCI driver must call this interface as
360 * part of its attach(9e) handler.
361 * Competing threads may try to attach mdi_vhci_register() as
362 * the vHCI drivers are loaded and attached as a result of pHCI
363 * driver instance registration (mdi_phci_register()) with the
364 * framework.
365 * Return Values:
366 * MDI_SUCCESS
367 * MDI_FAILURE
368 */
369 /*ARGSUSED*/
370 int
371 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops,
372 int flags)
373 {
374 mdi_vhci_t *vh = NULL;
375
376 /* Registrant can't be older */
377 ASSERT(vops->vo_revision <= MDI_VHCI_OPS_REV);
378
379 #ifdef DEBUG
380 /*
381 * IB nexus driver is loaded only when IB hardware is present.
382 * In order to be able to do this there is a need to drive the loading
383 * and attaching of the IB nexus driver (especially when an IB hardware
384 * is dynamically plugged in) when an IB HCA driver (PHCI)
385 * is being attached. Unfortunately this gets into the limitations
386 * of devfs as there seems to be no clean way to drive configuration
387 * of a subtree from another subtree of a devfs. Hence, do not ASSERT
388 * for IB.
389 */
390 if (strcmp(class, MDI_HCI_CLASS_IB) != 0)
391 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
392 #endif
393
394 i_mdi_init();
395
396 mutex_enter(&mdi_mutex);
397 /*
398 * Scan for already registered vhci
399 */
400 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
401 if (strcmp(vh->vh_class, class) == 0) {
402 /*
403 * vHCI has already been created. Check for valid
404 * vHCI ops registration. We only support one vHCI
405 * module per class
406 */
407 if (vh->vh_ops != NULL) {
408 mutex_exit(&mdi_mutex);
409 cmn_err(CE_NOTE, vhci_greeting, class);
410 return (MDI_FAILURE);
411 }
412 break;
413 }
414 }
415
416 /*
417 * if not yet created, create the vHCI component
418 */
419 if (vh == NULL) {
420 struct client_hash *hash = NULL;
421 char *load_balance;
422
423 /*
424 * Allocate and initialize the mdi extensions
425 */
426 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP);
427 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash),
428 KM_SLEEP);
429 vh->vh_client_table = hash;
430 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP);
431 (void) strcpy(vh->vh_class, class);
432 vh->vh_lb = LOAD_BALANCE_RR;
433 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip,
434 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) {
435 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) {
436 vh->vh_lb = LOAD_BALANCE_NONE;
437 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA)
438 == 0) {
439 vh->vh_lb = LOAD_BALANCE_LBA;
440 }
441 ddi_prop_free(load_balance);
442 }
443
444 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL);
445 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL);
446
447 /*
448 * Store the vHCI ops vectors
449 */
450 vh->vh_dip = vdip;
451 vh->vh_ops = vops;
452
453 setup_vhci_cache(vh);
454
455 if (mdi_vhci_head == NULL) {
456 mdi_vhci_head = vh;
457 }
458 if (mdi_vhci_tail) {
459 mdi_vhci_tail->vh_next = vh;
460 }
461 mdi_vhci_tail = vh;
462 mdi_vhci_count++;
463 }
464
465 /*
466 * Claim the devfs node as a vhci component
467 */
468 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI;
469
470 /*
471 * Initialize our back reference from dev_info node
472 */
473 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh;
474 mutex_exit(&mdi_mutex);
475 return (MDI_SUCCESS);
476 }
477
478 /*
479 * mdi_vhci_unregister():
480 * Unregister a vHCI module from mpxio framework
481 * mdi_vhci_unregister() is called from the detach(9E) entrypoint
482 * of a vhci to unregister it from the framework.
483 * Return Values:
484 * MDI_SUCCESS
485 * MDI_FAILURE
486 */
487 /*ARGSUSED*/
488 int
489 mdi_vhci_unregister(dev_info_t *vdip, int flags)
490 {
491 mdi_vhci_t *found, *vh, *prev = NULL;
492
493 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
494
495 /*
496 * Check for invalid VHCI
497 */
498 if ((vh = i_devi_get_vhci(vdip)) == NULL)
499 return (MDI_FAILURE);
500
501 /*
502 * Scan the list of registered vHCIs for a match
503 */
504 mutex_enter(&mdi_mutex);
505 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) {
506 if (found == vh)
507 break;
508 prev = found;
509 }
510
511 if (found == NULL) {
512 mutex_exit(&mdi_mutex);
513 return (MDI_FAILURE);
514 }
515
516 /*
517 * Check the vHCI, pHCI and client count. All the pHCIs and clients
518 * should have been unregistered, before a vHCI can be
519 * unregistered.
520 */
521 MDI_VHCI_PHCI_LOCK(vh);
522 if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) {
523 MDI_VHCI_PHCI_UNLOCK(vh);
524 mutex_exit(&mdi_mutex);
525 return (MDI_FAILURE);
526 }
527 MDI_VHCI_PHCI_UNLOCK(vh);
528
529 if (destroy_vhci_cache(vh) != MDI_SUCCESS) {
530 mutex_exit(&mdi_mutex);
531 return (MDI_FAILURE);
532 }
533
534 /*
535 * Remove the vHCI from the global list
536 */
537 if (vh == mdi_vhci_head) {
538 mdi_vhci_head = vh->vh_next;
539 } else {
540 prev->vh_next = vh->vh_next;
541 }
542 if (vh == mdi_vhci_tail) {
543 mdi_vhci_tail = prev;
544 }
545 mdi_vhci_count--;
546 mutex_exit(&mdi_mutex);
547
548 vh->vh_ops = NULL;
549 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI;
550 DEVI(vdip)->devi_mdi_xhci = NULL;
551 kmem_free(vh->vh_class, strlen(vh->vh_class)+1);
552 kmem_free(vh->vh_client_table,
553 mdi_client_table_size * sizeof (struct client_hash));
554 mutex_destroy(&vh->vh_phci_mutex);
555 mutex_destroy(&vh->vh_client_mutex);
556
557 kmem_free(vh, sizeof (mdi_vhci_t));
558 return (MDI_SUCCESS);
559 }
560
561 /*
562 * i_mdi_vhci_class2vhci():
563 * Look for a matching vHCI module given a vHCI class name
564 * Return Values:
565 * Handle to a vHCI component
566 * NULL
567 */
568 static mdi_vhci_t *
569 i_mdi_vhci_class2vhci(char *class)
570 {
571 mdi_vhci_t *vh = NULL;
572
573 ASSERT(!MUTEX_HELD(&mdi_mutex));
574
575 mutex_enter(&mdi_mutex);
576 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
577 if (strcmp(vh->vh_class, class) == 0) {
578 break;
579 }
580 }
581 mutex_exit(&mdi_mutex);
582 return (vh);
583 }
584
585 /*
586 * i_devi_get_vhci():
587 * Utility function to get the handle to a vHCI component
588 * Return Values:
589 * Handle to a vHCI component
590 * NULL
591 */
592 mdi_vhci_t *
593 i_devi_get_vhci(dev_info_t *vdip)
594 {
595 mdi_vhci_t *vh = NULL;
596 if (MDI_VHCI(vdip)) {
597 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci;
598 }
599 return (vh);
600 }
601
602 /*
603 * mdi_phci_register():
604 * Register a pHCI module with mpxio framework
605 * mdi_phci_register() is called by pHCI drivers to register with
606 * the mpxio framework and a specific 'class_driver' vHCI. The
607 * pHCI driver must call this interface as part of its attach(9e)
608 * handler.
609 * Return Values:
610 * MDI_SUCCESS
611 * MDI_FAILURE
612 */
613 /*ARGSUSED*/
614 int
615 mdi_phci_register(char *class, dev_info_t *pdip, int flags)
616 {
617 mdi_phci_t *ph;
618 mdi_vhci_t *vh;
619 char *data;
620
621 /*
622 * Some subsystems, like fcp, perform pHCI registration from a
623 * different thread than the one doing the pHCI attach(9E) - the
624 * driver attach code is waiting for this other thread to complete.
625 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent
626 * (indicating that some thread has done an ndi_devi_enter of parent)
627 * not DEVI_BUSY_OWNED (which would indicate that we did the enter).
628 */
629 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
630
631 /*
632 * Check for mpxio-disable property. Enable mpxio if the property is
633 * missing or not set to "yes".
634 * If the property is set to "yes" then emit a brief message.
635 */
636 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable",
637 &data) == DDI_SUCCESS)) {
638 if (strcmp(data, "yes") == 0) {
639 MDI_DEBUG(1, (MDI_CONT, pdip,
640 "?multipath capabilities disabled via %s.conf.",
641 ddi_driver_name(pdip)));
642 ddi_prop_free(data);
643 return (MDI_FAILURE);
644 }
645 ddi_prop_free(data);
646 }
647
648 /*
649 * Search for a matching vHCI
650 */
651 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
652 if (vh == NULL) {
653 return (MDI_FAILURE);
654 }
655
656 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP);
657 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL);
658 ph->ph_dip = pdip;
659 ph->ph_vhci = vh;
660 ph->ph_next = NULL;
661 ph->ph_unstable = 0;
662 ph->ph_vprivate = 0;
663 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL);
664
665 MDI_PHCI_LOCK(ph);
666 MDI_PHCI_SET_POWER_UP(ph);
667 MDI_PHCI_UNLOCK(ph);
668 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI;
669 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph;
670
671 vhcache_phci_add(vh->vh_config, ph);
672
673 MDI_VHCI_PHCI_LOCK(vh);
674 if (vh->vh_phci_head == NULL) {
675 vh->vh_phci_head = ph;
676 }
677 if (vh->vh_phci_tail) {
678 vh->vh_phci_tail->ph_next = ph;
679 }
680 vh->vh_phci_tail = ph;
681 vh->vh_phci_count++;
682 MDI_VHCI_PHCI_UNLOCK(vh);
683
684 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER);
685 return (MDI_SUCCESS);
686 }
687
688 /*
689 * mdi_phci_unregister():
690 * Unregister a pHCI module from mpxio framework
691 * mdi_phci_unregister() is called by the pHCI drivers from their
692 * detach(9E) handler to unregister their instances from the
693 * framework.
694 * Return Values:
695 * MDI_SUCCESS
696 * MDI_FAILURE
697 */
698 /*ARGSUSED*/
699 int
700 mdi_phci_unregister(dev_info_t *pdip, int flags)
701 {
702 mdi_vhci_t *vh;
703 mdi_phci_t *ph;
704 mdi_phci_t *tmp;
705 mdi_phci_t *prev = NULL;
706 mdi_pathinfo_t *pip;
707
708 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
709
710 ph = i_devi_get_phci(pdip);
711 if (ph == NULL) {
712 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid pHCI"));
713 return (MDI_FAILURE);
714 }
715
716 vh = ph->ph_vhci;
717 ASSERT(vh != NULL);
718 if (vh == NULL) {
719 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid vHCI"));
720 return (MDI_FAILURE);
721 }
722
723 MDI_VHCI_PHCI_LOCK(vh);
724 tmp = vh->vh_phci_head;
725 while (tmp) {
726 if (tmp == ph) {
727 break;
728 }
729 prev = tmp;
730 tmp = tmp->ph_next;
731 }
732
733 if (ph == vh->vh_phci_head) {
734 vh->vh_phci_head = ph->ph_next;
735 } else {
736 prev->ph_next = ph->ph_next;
737 }
738
739 if (ph == vh->vh_phci_tail) {
740 vh->vh_phci_tail = prev;
741 }
742
743 vh->vh_phci_count--;
744 MDI_VHCI_PHCI_UNLOCK(vh);
745
746 /* Walk remaining pathinfo nodes and disassociate them from pHCI */
747 MDI_PHCI_LOCK(ph);
748 for (pip = (mdi_pathinfo_t *)ph->ph_path_head; pip;
749 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link)
750 MDI_PI(pip)->pi_phci = NULL;
751 MDI_PHCI_UNLOCK(ph);
752
753 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class,
754 ESC_DDI_INITIATOR_UNREGISTER);
755 vhcache_phci_remove(vh->vh_config, ph);
756 cv_destroy(&ph->ph_unstable_cv);
757 mutex_destroy(&ph->ph_mutex);
758 kmem_free(ph, sizeof (mdi_phci_t));
759 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI;
760 DEVI(pdip)->devi_mdi_xhci = NULL;
761 return (MDI_SUCCESS);
762 }
763
764 /*
765 * i_devi_get_phci():
766 * Utility function to return the phci extensions.
767 */
768 static mdi_phci_t *
769 i_devi_get_phci(dev_info_t *pdip)
770 {
771 mdi_phci_t *ph = NULL;
772
773 if (MDI_PHCI(pdip)) {
774 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci;
775 }
776 return (ph);
777 }
778
779 /*
780 * Single thread mdi entry into devinfo node for modifying its children.
781 * If necessary we perform an ndi_devi_enter of the vHCI before doing
782 * an ndi_devi_enter of 'dip'. We maintain circular in two parts: one
783 * for the vHCI and one for the pHCI.
784 */
785 void
786 mdi_devi_enter(dev_info_t *phci_dip, int *circular)
787 {
788 dev_info_t *vdip;
789 int vcircular, pcircular;
790
791 /* Verify calling context */
792 ASSERT(MDI_PHCI(phci_dip));
793 vdip = mdi_devi_get_vdip(phci_dip);
794 ASSERT(vdip); /* A pHCI always has a vHCI */
795
796 /*
797 * If pHCI is detaching then the framework has already entered the
798 * vHCI on a threads that went down the code path leading to
799 * detach_node(). This framework enter of the vHCI during pHCI
800 * detach is done to avoid deadlock with vHCI power management
801 * operations which enter the vHCI and the enter down the path
802 * to the pHCI. If pHCI is detaching then we piggyback this calls
803 * enter of the vHCI on frameworks vHCI enter that has already
804 * occurred - this is OK because we know that the framework thread
805 * doing detach is waiting for our completion.
806 *
807 * We should DEVI_IS_DETACHING under an enter of the parent to avoid
808 * race with detach - but we can't do that because the framework has
809 * already entered the parent, so we have some complexity instead.
810 */
811 for (;;) {
812 if (ndi_devi_tryenter(vdip, &vcircular)) {
813 ASSERT(vcircular != -1);
814 if (DEVI_IS_DETACHING(phci_dip)) {
815 ndi_devi_exit(vdip, vcircular);
816 vcircular = -1;
817 }
818 break;
819 } else if (DEVI_IS_DETACHING(phci_dip)) {
820 vcircular = -1;
821 break;
822 } else if (servicing_interrupt()) {
823 /*
824 * Don't delay an interrupt (and ensure adaptive
825 * mutex inversion support).
826 */
827 ndi_devi_enter(vdip, &vcircular);
828 break;
829 } else {
830 delay_random(mdi_delay);
831 }
832 }
833
834 ndi_devi_enter(phci_dip, &pcircular);
835 *circular = (vcircular << 16) | (pcircular & 0xFFFF);
836 }
837
838 /*
839 * Attempt to mdi_devi_enter.
840 */
841 int
842 mdi_devi_tryenter(dev_info_t *phci_dip, int *circular)
843 {
844 dev_info_t *vdip;
845 int vcircular, pcircular;
846
847 /* Verify calling context */
848 ASSERT(MDI_PHCI(phci_dip));
849 vdip = mdi_devi_get_vdip(phci_dip);
850 ASSERT(vdip); /* A pHCI always has a vHCI */
851
852 if (ndi_devi_tryenter(vdip, &vcircular)) {
853 if (ndi_devi_tryenter(phci_dip, &pcircular)) {
854 *circular = (vcircular << 16) | (pcircular & 0xFFFF);
855 return (1); /* locked */
856 }
857 ndi_devi_exit(vdip, vcircular);
858 }
859 return (0); /* busy */
860 }
861
862 /*
863 * Release mdi_devi_enter or successful mdi_devi_tryenter.
864 */
865 void
866 mdi_devi_exit(dev_info_t *phci_dip, int circular)
867 {
868 dev_info_t *vdip;
869 int vcircular, pcircular;
870
871 /* Verify calling context */
872 ASSERT(MDI_PHCI(phci_dip));
873 vdip = mdi_devi_get_vdip(phci_dip);
874 ASSERT(vdip); /* A pHCI always has a vHCI */
875
876 /* extract two circular recursion values from single int */
877 pcircular = (short)(circular & 0xFFFF);
878 vcircular = (short)((circular >> 16) & 0xFFFF);
879
880 ndi_devi_exit(phci_dip, pcircular);
881 if (vcircular != -1)
882 ndi_devi_exit(vdip, vcircular);
883 }
884
885 /*
886 * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used
887 * around a pHCI drivers calls to mdi_pi_online/offline, after holding
888 * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock
889 * with vHCI power management code during path online/offline. Each
890 * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must
891 * occur within the scope of an active mdi_devi_enter that establishes the
892 * circular value.
893 */
894 void
895 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular)
896 {
897 int pcircular;
898
899 /* Verify calling context */
900 ASSERT(MDI_PHCI(phci_dip));
901
902 /* Keep hold on pHCI until we reenter in mdi_devi_enter_phci */
903 ndi_hold_devi(phci_dip);
904
905 pcircular = (short)(circular & 0xFFFF);
906 ndi_devi_exit(phci_dip, pcircular);
907 }
908
909 void
910 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular)
911 {
912 int pcircular;
913
914 /* Verify calling context */
915 ASSERT(MDI_PHCI(phci_dip));
916
917 ndi_devi_enter(phci_dip, &pcircular);
918
919 /* Drop hold from mdi_devi_exit_phci. */
920 ndi_rele_devi(phci_dip);
921
922 /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */
923 ASSERT(pcircular == ((short)(*circular & 0xFFFF)));
924 }
925
926 /*
927 * mdi_devi_get_vdip():
928 * given a pHCI dip return vHCI dip
929 */
930 dev_info_t *
931 mdi_devi_get_vdip(dev_info_t *pdip)
932 {
933 mdi_phci_t *ph;
934
935 ph = i_devi_get_phci(pdip);
936 if (ph && ph->ph_vhci)
937 return (ph->ph_vhci->vh_dip);
938 return (NULL);
939 }
940
941 /*
942 * mdi_devi_pdip_entered():
943 * Return 1 if we are vHCI and have done an ndi_devi_enter
944 * of a pHCI
945 */
946 int
947 mdi_devi_pdip_entered(dev_info_t *vdip)
948 {
949 mdi_vhci_t *vh;
950 mdi_phci_t *ph;
951
952 vh = i_devi_get_vhci(vdip);
953 if (vh == NULL)
954 return (0);
955
956 MDI_VHCI_PHCI_LOCK(vh);
957 ph = vh->vh_phci_head;
958 while (ph) {
959 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) {
960 MDI_VHCI_PHCI_UNLOCK(vh);
961 return (1);
962 }
963 ph = ph->ph_next;
964 }
965 MDI_VHCI_PHCI_UNLOCK(vh);
966 return (0);
967 }
968
969 /*
970 * mdi_phci_path2devinfo():
971 * Utility function to search for a valid phci device given
972 * the devfs pathname.
973 */
974 dev_info_t *
975 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname)
976 {
977 char *temp_pathname;
978 mdi_vhci_t *vh;
979 mdi_phci_t *ph;
980 dev_info_t *pdip = NULL;
981
982 vh = i_devi_get_vhci(vdip);
983 ASSERT(vh != NULL);
984
985 if (vh == NULL) {
986 /*
987 * Invalid vHCI component, return failure
988 */
989 return (NULL);
990 }
991
992 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
993 MDI_VHCI_PHCI_LOCK(vh);
994 ph = vh->vh_phci_head;
995 while (ph != NULL) {
996 pdip = ph->ph_dip;
997 ASSERT(pdip != NULL);
998 *temp_pathname = '\0';
999 (void) ddi_pathname(pdip, temp_pathname);
1000 if (strcmp(temp_pathname, pathname) == 0) {
1001 break;
1002 }
1003 ph = ph->ph_next;
1004 }
1005 if (ph == NULL) {
1006 pdip = NULL;
1007 }
1008 MDI_VHCI_PHCI_UNLOCK(vh);
1009 kmem_free(temp_pathname, MAXPATHLEN);
1010 return (pdip);
1011 }
1012
1013 /*
1014 * mdi_phci_get_path_count():
1015 * get number of path information nodes associated with a given
1016 * pHCI device.
1017 */
1018 int
1019 mdi_phci_get_path_count(dev_info_t *pdip)
1020 {
1021 mdi_phci_t *ph;
1022 int count = 0;
1023
1024 ph = i_devi_get_phci(pdip);
1025 if (ph != NULL) {
1026 count = ph->ph_path_count;
1027 }
1028 return (count);
1029 }
1030
1031 /*
1032 * i_mdi_phci_lock():
1033 * Lock a pHCI device
1034 * Return Values:
1035 * None
1036 * Note:
1037 * The default locking order is:
1038 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
1039 * But there are number of situations where locks need to be
1040 * grabbed in reverse order. This routine implements try and lock
1041 * mechanism depending on the requested parameter option.
1042 */
1043 static void
1044 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip)
1045 {
1046 if (pip) {
1047 /* Reverse locking is requested. */
1048 while (MDI_PHCI_TRYLOCK(ph) == 0) {
1049 if (servicing_interrupt()) {
1050 MDI_PI_HOLD(pip);
1051 MDI_PI_UNLOCK(pip);
1052 MDI_PHCI_LOCK(ph);
1053 MDI_PI_LOCK(pip);
1054 MDI_PI_RELE(pip);
1055 break;
1056 } else {
1057 /*
1058 * tryenter failed. Try to grab again
1059 * after a small delay
1060 */
1061 MDI_PI_HOLD(pip);
1062 MDI_PI_UNLOCK(pip);
1063 delay_random(mdi_delay);
1064 MDI_PI_LOCK(pip);
1065 MDI_PI_RELE(pip);
1066 }
1067 }
1068 } else {
1069 MDI_PHCI_LOCK(ph);
1070 }
1071 }
1072
1073 /*
1074 * i_mdi_phci_unlock():
1075 * Unlock the pHCI component
1076 */
1077 static void
1078 i_mdi_phci_unlock(mdi_phci_t *ph)
1079 {
1080 MDI_PHCI_UNLOCK(ph);
1081 }
1082
1083 /*
1084 * i_mdi_devinfo_create():
1085 * create client device's devinfo node
1086 * Return Values:
1087 * dev_info
1088 * NULL
1089 * Notes:
1090 */
1091 static dev_info_t *
1092 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid,
1093 char **compatible, int ncompatible)
1094 {
1095 dev_info_t *cdip = NULL;
1096
1097 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1098
1099 /* Verify for duplicate entry */
1100 cdip = i_mdi_devinfo_find(vh, name, guid);
1101 ASSERT(cdip == NULL);
1102 if (cdip) {
1103 cmn_err(CE_WARN,
1104 "i_mdi_devinfo_create: client %s@%s already exists",
1105 name ? name : "", guid ? guid : "");
1106 }
1107
1108 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip);
1109 if (cdip == NULL)
1110 goto fail;
1111
1112 /*
1113 * Create component type and Global unique identifier
1114 * properties
1115 */
1116 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip,
1117 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) {
1118 goto fail;
1119 }
1120
1121 /* Decorate the node with compatible property */
1122 if (compatible &&
1123 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip,
1124 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) {
1125 goto fail;
1126 }
1127
1128 return (cdip);
1129
1130 fail:
1131 if (cdip) {
1132 (void) ndi_prop_remove_all(cdip);
1133 (void) ndi_devi_free(cdip);
1134 }
1135 return (NULL);
1136 }
1137
1138 /*
1139 * i_mdi_devinfo_find():
1140 * Find a matching devinfo node for given client node name
1141 * and its guid.
1142 * Return Values:
1143 * Handle to a dev_info node or NULL
1144 */
1145 static dev_info_t *
1146 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid)
1147 {
1148 char *data;
1149 dev_info_t *cdip = NULL;
1150 dev_info_t *ndip = NULL;
1151 int circular;
1152
1153 ndi_devi_enter(vh->vh_dip, &circular);
1154 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child;
1155 while ((cdip = ndip) != NULL) {
1156 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1157
1158 if (strcmp(DEVI(cdip)->devi_node_name, name)) {
1159 continue;
1160 }
1161
1162 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip,
1163 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP,
1164 &data) != DDI_PROP_SUCCESS) {
1165 continue;
1166 }
1167
1168 if (strcmp(data, guid) != 0) {
1169 ddi_prop_free(data);
1170 continue;
1171 }
1172 ddi_prop_free(data);
1173 break;
1174 }
1175 ndi_devi_exit(vh->vh_dip, circular);
1176 return (cdip);
1177 }
1178
1179 /*
1180 * i_mdi_devinfo_remove():
1181 * Remove a client device node
1182 */
1183 static int
1184 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags)
1185 {
1186 int rv = MDI_SUCCESS;
1187
1188 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS ||
1189 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) {
1190 int nflags = NDI_DEVFS_CLEAN | NDI_DEVI_REMOVE;
1191
1192 if (flags & MDI_CLIENT_FLAGS_NO_EVENT)
1193 nflags |= NDI_NO_EVENT;
1194
1195 rv = ndi_devi_offline(cdip, nflags);
1196 if (rv != NDI_SUCCESS) {
1197 MDI_DEBUG(1, (MDI_NOTE, cdip,
1198 "!failed: cdip %p", (void *)cdip));
1199 }
1200 /*
1201 * Convert to MDI error code
1202 */
1203 switch (rv) {
1204 case NDI_SUCCESS:
1205 rv = MDI_SUCCESS;
1206 break;
1207 case NDI_BUSY:
1208 rv = MDI_BUSY;
1209 break;
1210 default:
1211 rv = MDI_FAILURE;
1212 break;
1213 }
1214 }
1215 return (rv);
1216 }
1217
1218 /*
1219 * i_devi_get_client()
1220 * Utility function to get mpxio component extensions
1221 */
1222 static mdi_client_t *
1223 i_devi_get_client(dev_info_t *cdip)
1224 {
1225 mdi_client_t *ct = NULL;
1226
1227 if (MDI_CLIENT(cdip)) {
1228 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client;
1229 }
1230 return (ct);
1231 }
1232
1233 /*
1234 * i_mdi_is_child_present():
1235 * Search for the presence of client device dev_info node
1236 */
1237 static int
1238 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip)
1239 {
1240 int rv = MDI_FAILURE;
1241 struct dev_info *dip;
1242 int circular;
1243
1244 ndi_devi_enter(vdip, &circular);
1245 dip = DEVI(vdip)->devi_child;
1246 while (dip) {
1247 if (dip == DEVI(cdip)) {
1248 rv = MDI_SUCCESS;
1249 break;
1250 }
1251 dip = dip->devi_sibling;
1252 }
1253 ndi_devi_exit(vdip, circular);
1254 return (rv);
1255 }
1256
1257
1258 /*
1259 * i_mdi_client_lock():
1260 * Grab client component lock
1261 * Return Values:
1262 * None
1263 * Note:
1264 * The default locking order is:
1265 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
1266 * But there are number of situations where locks need to be
1267 * grabbed in reverse order. This routine implements try and lock
1268 * mechanism depending on the requested parameter option.
1269 */
1270 static void
1271 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip)
1272 {
1273 if (pip) {
1274 /*
1275 * Reverse locking is requested.
1276 */
1277 while (MDI_CLIENT_TRYLOCK(ct) == 0) {
1278 if (servicing_interrupt()) {
1279 MDI_PI_HOLD(pip);
1280 MDI_PI_UNLOCK(pip);
1281 MDI_CLIENT_LOCK(ct);
1282 MDI_PI_LOCK(pip);
1283 MDI_PI_RELE(pip);
1284 break;
1285 } else {
1286 /*
1287 * tryenter failed. Try to grab again
1288 * after a small delay
1289 */
1290 MDI_PI_HOLD(pip);
1291 MDI_PI_UNLOCK(pip);
1292 delay_random(mdi_delay);
1293 MDI_PI_LOCK(pip);
1294 MDI_PI_RELE(pip);
1295 }
1296 }
1297 } else {
1298 MDI_CLIENT_LOCK(ct);
1299 }
1300 }
1301
1302 /*
1303 * i_mdi_client_unlock():
1304 * Unlock a client component
1305 */
1306 static void
1307 i_mdi_client_unlock(mdi_client_t *ct)
1308 {
1309 MDI_CLIENT_UNLOCK(ct);
1310 }
1311
1312 /*
1313 * i_mdi_client_alloc():
1314 * Allocate and initialize a client structure. Caller should
1315 * hold the vhci client lock.
1316 * Return Values:
1317 * Handle to a client component
1318 */
1319 /*ARGSUSED*/
1320 static mdi_client_t *
1321 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid)
1322 {
1323 mdi_client_t *ct;
1324
1325 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1326
1327 /*
1328 * Allocate and initialize a component structure.
1329 */
1330 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP);
1331 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
1332 ct->ct_hnext = NULL;
1333 ct->ct_hprev = NULL;
1334 ct->ct_dip = NULL;
1335 ct->ct_vhci = vh;
1336 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP);
1337 (void) strcpy(ct->ct_drvname, name);
1338 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP);
1339 (void) strcpy(ct->ct_guid, lguid);
1340 ct->ct_cprivate = NULL;
1341 ct->ct_vprivate = NULL;
1342 ct->ct_flags = 0;
1343 ct->ct_state = MDI_CLIENT_STATE_FAILED;
1344 MDI_CLIENT_LOCK(ct);
1345 MDI_CLIENT_SET_OFFLINE(ct);
1346 MDI_CLIENT_SET_DETACH(ct);
1347 MDI_CLIENT_SET_POWER_UP(ct);
1348 MDI_CLIENT_UNLOCK(ct);
1349 ct->ct_failover_flags = 0;
1350 ct->ct_failover_status = 0;
1351 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL);
1352 ct->ct_unstable = 0;
1353 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL);
1354 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL);
1355 ct->ct_lb = vh->vh_lb;
1356 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP);
1357 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE;
1358 ct->ct_path_count = 0;
1359 ct->ct_path_head = NULL;
1360 ct->ct_path_tail = NULL;
1361 ct->ct_path_last = NULL;
1362
1363 /*
1364 * Add this client component to our client hash queue
1365 */
1366 i_mdi_client_enlist_table(vh, ct);
1367 return (ct);
1368 }
1369
1370 /*
1371 * i_mdi_client_enlist_table():
1372 * Attach the client device to the client hash table. Caller
1373 * should hold the vhci client lock.
1374 */
1375 static void
1376 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1377 {
1378 int index;
1379 struct client_hash *head;
1380
1381 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1382
1383 index = i_mdi_get_hash_key(ct->ct_guid);
1384 head = &vh->vh_client_table[index];
1385 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head;
1386 head->ct_hash_head = ct;
1387 head->ct_hash_count++;
1388 vh->vh_client_count++;
1389 }
1390
1391 /*
1392 * i_mdi_client_delist_table():
1393 * Attach the client device to the client hash table.
1394 * Caller should hold the vhci client lock.
1395 */
1396 static void
1397 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1398 {
1399 int index;
1400 char *guid;
1401 struct client_hash *head;
1402 mdi_client_t *next;
1403 mdi_client_t *last;
1404
1405 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1406
1407 guid = ct->ct_guid;
1408 index = i_mdi_get_hash_key(guid);
1409 head = &vh->vh_client_table[index];
1410
1411 last = NULL;
1412 next = (mdi_client_t *)head->ct_hash_head;
1413 while (next != NULL) {
1414 if (next == ct) {
1415 break;
1416 }
1417 last = next;
1418 next = next->ct_hnext;
1419 }
1420
1421 if (next) {
1422 head->ct_hash_count--;
1423 if (last == NULL) {
1424 head->ct_hash_head = ct->ct_hnext;
1425 } else {
1426 last->ct_hnext = ct->ct_hnext;
1427 }
1428 ct->ct_hnext = NULL;
1429 vh->vh_client_count--;
1430 }
1431 }
1432
1433
1434 /*
1435 * i_mdi_client_free():
1436 * Free a client component
1437 */
1438 static int
1439 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct)
1440 {
1441 int rv = MDI_SUCCESS;
1442 int flags = ct->ct_flags;
1443 dev_info_t *cdip;
1444 dev_info_t *vdip;
1445
1446 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1447
1448 vdip = vh->vh_dip;
1449 cdip = ct->ct_dip;
1450
1451 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP);
1452 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT;
1453 DEVI(cdip)->devi_mdi_client = NULL;
1454
1455 /*
1456 * Clear out back ref. to dev_info_t node
1457 */
1458 ct->ct_dip = NULL;
1459
1460 /*
1461 * Remove this client from our hash queue
1462 */
1463 i_mdi_client_delist_table(vh, ct);
1464
1465 /*
1466 * Uninitialize and free the component
1467 */
1468 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1);
1469 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1);
1470 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t));
1471 cv_destroy(&ct->ct_failover_cv);
1472 cv_destroy(&ct->ct_unstable_cv);
1473 cv_destroy(&ct->ct_powerchange_cv);
1474 mutex_destroy(&ct->ct_mutex);
1475 kmem_free(ct, sizeof (*ct));
1476
1477 if (cdip != NULL) {
1478 MDI_VHCI_CLIENT_UNLOCK(vh);
1479 (void) i_mdi_devinfo_remove(vdip, cdip, flags);
1480 MDI_VHCI_CLIENT_LOCK(vh);
1481 }
1482 return (rv);
1483 }
1484
1485 /*
1486 * i_mdi_client_find():
1487 * Find the client structure corresponding to a given guid
1488 * Caller should hold the vhci client lock.
1489 */
1490 static mdi_client_t *
1491 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid)
1492 {
1493 int index;
1494 struct client_hash *head;
1495 mdi_client_t *ct;
1496
1497 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1498
1499 index = i_mdi_get_hash_key(guid);
1500 head = &vh->vh_client_table[index];
1501
1502 ct = head->ct_hash_head;
1503 while (ct != NULL) {
1504 if (strcmp(ct->ct_guid, guid) == 0 &&
1505 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) {
1506 break;
1507 }
1508 ct = ct->ct_hnext;
1509 }
1510 return (ct);
1511 }
1512
1513 /*
1514 * i_mdi_client_update_state():
1515 * Compute and update client device state
1516 * Notes:
1517 * A client device can be in any of three possible states:
1518 *
1519 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more
1520 * one online/standby paths. Can tolerate failures.
1521 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with
1522 * no alternate paths available as standby. A failure on the online
1523 * would result in loss of access to device data.
1524 * MDI_CLIENT_STATE_FAILED - Client device in failed state with
1525 * no paths available to access the device.
1526 */
1527 static void
1528 i_mdi_client_update_state(mdi_client_t *ct)
1529 {
1530 int state;
1531
1532 ASSERT(MDI_CLIENT_LOCKED(ct));
1533 state = i_mdi_client_compute_state(ct, NULL);
1534 MDI_CLIENT_SET_STATE(ct, state);
1535 }
1536
1537 /*
1538 * i_mdi_client_compute_state():
1539 * Compute client device state
1540 *
1541 * mdi_phci_t * Pointer to pHCI structure which should
1542 * while computing the new value. Used by
1543 * i_mdi_phci_offline() to find the new
1544 * client state after DR of a pHCI.
1545 */
1546 static int
1547 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph)
1548 {
1549 int state;
1550 int online_count = 0;
1551 int standby_count = 0;
1552 mdi_pathinfo_t *pip, *next;
1553
1554 ASSERT(MDI_CLIENT_LOCKED(ct));
1555 pip = ct->ct_path_head;
1556 while (pip != NULL) {
1557 MDI_PI_LOCK(pip);
1558 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1559 if (MDI_PI(pip)->pi_phci == ph) {
1560 MDI_PI_UNLOCK(pip);
1561 pip = next;
1562 continue;
1563 }
1564
1565 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1566 == MDI_PATHINFO_STATE_ONLINE)
1567 online_count++;
1568 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1569 == MDI_PATHINFO_STATE_STANDBY)
1570 standby_count++;
1571 MDI_PI_UNLOCK(pip);
1572 pip = next;
1573 }
1574
1575 if (online_count == 0) {
1576 if (standby_count == 0) {
1577 state = MDI_CLIENT_STATE_FAILED;
1578 MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
1579 "client state failed: ct = %p", (void *)ct));
1580 } else if (standby_count == 1) {
1581 state = MDI_CLIENT_STATE_DEGRADED;
1582 } else {
1583 state = MDI_CLIENT_STATE_OPTIMAL;
1584 }
1585 } else if (online_count == 1) {
1586 if (standby_count == 0) {
1587 state = MDI_CLIENT_STATE_DEGRADED;
1588 } else {
1589 state = MDI_CLIENT_STATE_OPTIMAL;
1590 }
1591 } else {
1592 state = MDI_CLIENT_STATE_OPTIMAL;
1593 }
1594 return (state);
1595 }
1596
1597 /*
1598 * i_mdi_client2devinfo():
1599 * Utility function
1600 */
1601 dev_info_t *
1602 i_mdi_client2devinfo(mdi_client_t *ct)
1603 {
1604 return (ct->ct_dip);
1605 }
1606
1607 /*
1608 * mdi_client_path2_devinfo():
1609 * Given the parent devinfo and child devfs pathname, search for
1610 * a valid devfs node handle.
1611 */
1612 dev_info_t *
1613 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname)
1614 {
1615 dev_info_t *cdip = NULL;
1616 dev_info_t *ndip = NULL;
1617 char *temp_pathname;
1618 int circular;
1619
1620 /*
1621 * Allocate temp buffer
1622 */
1623 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1624
1625 /*
1626 * Lock parent against changes
1627 */
1628 ndi_devi_enter(vdip, &circular);
1629 ndip = (dev_info_t *)DEVI(vdip)->devi_child;
1630 while ((cdip = ndip) != NULL) {
1631 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1632
1633 *temp_pathname = '\0';
1634 (void) ddi_pathname(cdip, temp_pathname);
1635 if (strcmp(temp_pathname, pathname) == 0) {
1636 break;
1637 }
1638 }
1639 /*
1640 * Release devinfo lock
1641 */
1642 ndi_devi_exit(vdip, circular);
1643
1644 /*
1645 * Free the temp buffer
1646 */
1647 kmem_free(temp_pathname, MAXPATHLEN);
1648 return (cdip);
1649 }
1650
1651 /*
1652 * mdi_client_get_path_count():
1653 * Utility function to get number of path information nodes
1654 * associated with a given client device.
1655 */
1656 int
1657 mdi_client_get_path_count(dev_info_t *cdip)
1658 {
1659 mdi_client_t *ct;
1660 int count = 0;
1661
1662 ct = i_devi_get_client(cdip);
1663 if (ct != NULL) {
1664 count = ct->ct_path_count;
1665 }
1666 return (count);
1667 }
1668
1669
1670 /*
1671 * i_mdi_get_hash_key():
1672 * Create a hash using strings as keys
1673 *
1674 */
1675 static int
1676 i_mdi_get_hash_key(char *str)
1677 {
1678 uint32_t g, hash = 0;
1679 char *p;
1680
1681 for (p = str; *p != '\0'; p++) {
1682 g = *p;
1683 hash += g;
1684 }
1685 return (hash % (CLIENT_HASH_TABLE_SIZE - 1));
1686 }
1687
1688 /*
1689 * mdi_get_lb_policy():
1690 * Get current load balancing policy for a given client device
1691 */
1692 client_lb_t
1693 mdi_get_lb_policy(dev_info_t *cdip)
1694 {
1695 client_lb_t lb = LOAD_BALANCE_NONE;
1696 mdi_client_t *ct;
1697
1698 ct = i_devi_get_client(cdip);
1699 if (ct != NULL) {
1700 lb = ct->ct_lb;
1701 }
1702 return (lb);
1703 }
1704
1705 /*
1706 * mdi_set_lb_region_size():
1707 * Set current region size for the load-balance
1708 */
1709 int
1710 mdi_set_lb_region_size(dev_info_t *cdip, int region_size)
1711 {
1712 mdi_client_t *ct;
1713 int rv = MDI_FAILURE;
1714
1715 ct = i_devi_get_client(cdip);
1716 if (ct != NULL && ct->ct_lb_args != NULL) {
1717 ct->ct_lb_args->region_size = region_size;
1718 rv = MDI_SUCCESS;
1719 }
1720 return (rv);
1721 }
1722
1723 /*
1724 * mdi_Set_lb_policy():
1725 * Set current load balancing policy for a given client device
1726 */
1727 int
1728 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb)
1729 {
1730 mdi_client_t *ct;
1731 int rv = MDI_FAILURE;
1732
1733 ct = i_devi_get_client(cdip);
1734 if (ct != NULL) {
1735 ct->ct_lb = lb;
1736 rv = MDI_SUCCESS;
1737 }
1738 return (rv);
1739 }
1740
1741 /*
1742 * mdi_failover():
1743 * failover function called by the vHCI drivers to initiate
1744 * a failover operation. This is typically due to non-availability
1745 * of online paths to route I/O requests. Failover can be
1746 * triggered through user application also.
1747 *
1748 * The vHCI driver calls mdi_failover() to initiate a failover
1749 * operation. mdi_failover() calls back into the vHCI driver's
1750 * vo_failover() entry point to perform the actual failover
1751 * operation. The reason for requiring the vHCI driver to
1752 * initiate failover by calling mdi_failover(), instead of directly
1753 * executing vo_failover() itself, is to ensure that the mdi
1754 * framework can keep track of the client state properly.
1755 * Additionally, mdi_failover() provides as a convenience the
1756 * option of performing the failover operation synchronously or
1757 * asynchronously
1758 *
1759 * Upon successful completion of the failover operation, the
1760 * paths that were previously ONLINE will be in the STANDBY state,
1761 * and the newly activated paths will be in the ONLINE state.
1762 *
1763 * The flags modifier determines whether the activation is done
1764 * synchronously: MDI_FAILOVER_SYNC
1765 * Return Values:
1766 * MDI_SUCCESS
1767 * MDI_FAILURE
1768 * MDI_BUSY
1769 */
1770 /*ARGSUSED*/
1771 int
1772 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
1773 {
1774 int rv;
1775 mdi_client_t *ct;
1776
1777 ct = i_devi_get_client(cdip);
1778 ASSERT(ct != NULL);
1779 if (ct == NULL) {
1780 /* cdip is not a valid client device. Nothing more to do. */
1781 return (MDI_FAILURE);
1782 }
1783
1784 MDI_CLIENT_LOCK(ct);
1785
1786 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) {
1787 /* A path to the client is being freed */
1788 MDI_CLIENT_UNLOCK(ct);
1789 return (MDI_BUSY);
1790 }
1791
1792
1793 if (MDI_CLIENT_IS_FAILED(ct)) {
1794 /*
1795 * Client is in failed state. Nothing more to do.
1796 */
1797 MDI_CLIENT_UNLOCK(ct);
1798 return (MDI_FAILURE);
1799 }
1800
1801 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1802 /*
1803 * Failover is already in progress; return BUSY
1804 */
1805 MDI_CLIENT_UNLOCK(ct);
1806 return (MDI_BUSY);
1807 }
1808 /*
1809 * Make sure that mdi_pathinfo node state changes are processed.
1810 * We do not allow failovers to progress while client path state
1811 * changes are in progress
1812 */
1813 if (ct->ct_unstable) {
1814 if (flags == MDI_FAILOVER_ASYNC) {
1815 MDI_CLIENT_UNLOCK(ct);
1816 return (MDI_BUSY);
1817 } else {
1818 while (ct->ct_unstable)
1819 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex);
1820 }
1821 }
1822
1823 /*
1824 * Client device is in stable state. Before proceeding, perform sanity
1825 * checks again.
1826 */
1827 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) ||
1828 (!i_ddi_devi_attached(cdip))) {
1829 /*
1830 * Client is in failed state. Nothing more to do.
1831 */
1832 MDI_CLIENT_UNLOCK(ct);
1833 return (MDI_FAILURE);
1834 }
1835
1836 /*
1837 * Set the client state as failover in progress.
1838 */
1839 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct);
1840 ct->ct_failover_flags = flags;
1841 MDI_CLIENT_UNLOCK(ct);
1842
1843 if (flags == MDI_FAILOVER_ASYNC) {
1844 /*
1845 * Submit the initiate failover request via CPR safe
1846 * taskq threads.
1847 */
1848 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover,
1849 ct, KM_SLEEP);
1850 return (MDI_ACCEPT);
1851 } else {
1852 /*
1853 * Synchronous failover mode. Typically invoked from the user
1854 * land.
1855 */
1856 rv = i_mdi_failover(ct);
1857 }
1858 return (rv);
1859 }
1860
1861 /*
1862 * i_mdi_failover():
1863 * internal failover function. Invokes vHCI drivers failover
1864 * callback function and process the failover status
1865 * Return Values:
1866 * None
1867 *
1868 * Note: A client device in failover state can not be detached or freed.
1869 */
1870 static int
1871 i_mdi_failover(void *arg)
1872 {
1873 int rv = MDI_SUCCESS;
1874 mdi_client_t *ct = (mdi_client_t *)arg;
1875 mdi_vhci_t *vh = ct->ct_vhci;
1876
1877 ASSERT(!MDI_CLIENT_LOCKED(ct));
1878
1879 if (vh->vh_ops->vo_failover != NULL) {
1880 /*
1881 * Call vHCI drivers callback routine
1882 */
1883 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip,
1884 ct->ct_failover_flags);
1885 }
1886
1887 MDI_CLIENT_LOCK(ct);
1888 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct);
1889
1890 /*
1891 * Save the failover return status
1892 */
1893 ct->ct_failover_status = rv;
1894
1895 /*
1896 * As a result of failover, client status would have been changed.
1897 * Update the client state and wake up anyone waiting on this client
1898 * device.
1899 */
1900 i_mdi_client_update_state(ct);
1901
1902 cv_broadcast(&ct->ct_failover_cv);
1903 MDI_CLIENT_UNLOCK(ct);
1904 return (rv);
1905 }
1906
1907 /*
1908 * Load balancing is logical block.
1909 * IOs within the range described by region_size
1910 * would go on the same path. This would improve the
1911 * performance by cache-hit on some of the RAID devices.
1912 * Search only for online paths(At some point we
1913 * may want to balance across target ports).
1914 * If no paths are found then default to round-robin.
1915 */
1916 static int
1917 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp)
1918 {
1919 int path_index = -1;
1920 int online_path_count = 0;
1921 int online_nonpref_path_count = 0;
1922 int region_size = ct->ct_lb_args->region_size;
1923 mdi_pathinfo_t *pip;
1924 mdi_pathinfo_t *next;
1925 int preferred, path_cnt;
1926
1927 pip = ct->ct_path_head;
1928 while (pip) {
1929 MDI_PI_LOCK(pip);
1930 if (MDI_PI(pip)->pi_state ==
1931 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) {
1932 online_path_count++;
1933 } else if (MDI_PI(pip)->pi_state ==
1934 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) {
1935 online_nonpref_path_count++;
1936 }
1937 next = (mdi_pathinfo_t *)
1938 MDI_PI(pip)->pi_client_link;
1939 MDI_PI_UNLOCK(pip);
1940 pip = next;
1941 }
1942 /* if found any online/preferred then use this type */
1943 if (online_path_count > 0) {
1944 path_cnt = online_path_count;
1945 preferred = 1;
1946 } else if (online_nonpref_path_count > 0) {
1947 path_cnt = online_nonpref_path_count;
1948 preferred = 0;
1949 } else {
1950 path_cnt = 0;
1951 }
1952 if (path_cnt) {
1953 path_index = (bp->b_blkno >> region_size) % path_cnt;
1954 pip = ct->ct_path_head;
1955 while (pip && path_index != -1) {
1956 MDI_PI_LOCK(pip);
1957 if (path_index == 0 &&
1958 (MDI_PI(pip)->pi_state ==
1959 MDI_PATHINFO_STATE_ONLINE) &&
1960 MDI_PI(pip)->pi_preferred == preferred) {
1961 MDI_PI_HOLD(pip);
1962 MDI_PI_UNLOCK(pip);
1963 *ret_pip = pip;
1964 return (MDI_SUCCESS);
1965 }
1966 path_index --;
1967 next = (mdi_pathinfo_t *)
1968 MDI_PI(pip)->pi_client_link;
1969 MDI_PI_UNLOCK(pip);
1970 pip = next;
1971 }
1972 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
1973 "lba %llx: path %s %p",
1974 bp->b_lblkno, mdi_pi_spathname(pip), (void *)pip));
1975 }
1976 return (MDI_FAILURE);
1977 }
1978
1979 /*
1980 * mdi_select_path():
1981 * select a path to access a client device.
1982 *
1983 * mdi_select_path() function is called by the vHCI drivers to
1984 * select a path to route the I/O request to. The caller passes
1985 * the block I/O data transfer structure ("buf") as one of the
1986 * parameters. The mpxio framework uses the buf structure
1987 * contents to maintain per path statistics (total I/O size /
1988 * count pending). If more than one online paths are available to
1989 * select, the framework automatically selects a suitable path
1990 * for routing I/O request. If a failover operation is active for
1991 * this client device the call shall be failed with MDI_BUSY error
1992 * code.
1993 *
1994 * By default this function returns a suitable path in online
1995 * state based on the current load balancing policy. Currently
1996 * we support LOAD_BALANCE_NONE (Previously selected online path
1997 * will continue to be used till the path is usable) and
1998 * LOAD_BALANCE_RR (Online paths will be selected in a round
1999 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected
2000 * based on the logical block). The load balancing
2001 * through vHCI drivers configuration file (driver.conf).
2002 *
2003 * vHCI drivers may override this default behavior by specifying
2004 * appropriate flags. The meaning of the thrid argument depends
2005 * on the flags specified. If MDI_SELECT_PATH_INSTANCE is set
2006 * then the argument is the "path instance" of the path to select.
2007 * If MDI_SELECT_PATH_INSTANCE is not set then the argument is
2008 * "start_pip". A non NULL "start_pip" is the starting point to
2009 * walk and find the next appropriate path. The following values
2010 * are currently defined: MDI_SELECT_ONLINE_PATH (to select an
2011 * ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an
2012 * STANDBY path).
2013 *
2014 * The non-standard behavior is used by the scsi_vhci driver,
2015 * whenever it has to use a STANDBY/FAULTED path. Eg. during
2016 * attach of client devices (to avoid an unnecessary failover
2017 * when the STANDBY path comes up first), during failover
2018 * (to activate a STANDBY path as ONLINE).
2019 *
2020 * The selected path is returned in a a mdi_hold_path() state
2021 * (pi_ref_cnt). Caller should release the hold by calling
2022 * mdi_rele_path().
2023 *
2024 * Return Values:
2025 * MDI_SUCCESS - Completed successfully
2026 * MDI_BUSY - Client device is busy failing over
2027 * MDI_NOPATH - Client device is online, but no valid path are
2028 * available to access this client device
2029 * MDI_FAILURE - Invalid client device or state
2030 * MDI_DEVI_ONLINING
2031 * - Client device (struct dev_info state) is in
2032 * onlining state.
2033 */
2034
2035 /*ARGSUSED*/
2036 int
2037 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags,
2038 void *arg, mdi_pathinfo_t **ret_pip)
2039 {
2040 mdi_client_t *ct;
2041 mdi_pathinfo_t *pip;
2042 mdi_pathinfo_t *next;
2043 mdi_pathinfo_t *head;
2044 mdi_pathinfo_t *start;
2045 client_lb_t lbp; /* load balancing policy */
2046 int sb = 1; /* standard behavior */
2047 int preferred = 1; /* preferred path */
2048 int cond, cont = 1;
2049 int retry = 0;
2050 mdi_pathinfo_t *start_pip; /* request starting pathinfo */
2051 int path_instance; /* request specific path instance */
2052
2053 /* determine type of arg based on flags */
2054 if (flags & MDI_SELECT_PATH_INSTANCE) {
2055 path_instance = (int)(intptr_t)arg;
2056 start_pip = NULL;
2057 } else {
2058 path_instance = 0;
2059 start_pip = (mdi_pathinfo_t *)arg;
2060 }
2061
2062 if (flags != 0) {
2063 /*
2064 * disable default behavior
2065 */
2066 sb = 0;
2067 }
2068
2069 *ret_pip = NULL;
2070 ct = i_devi_get_client(cdip);
2071 if (ct == NULL) {
2072 /* mdi extensions are NULL, Nothing more to do */
2073 return (MDI_FAILURE);
2074 }
2075
2076 MDI_CLIENT_LOCK(ct);
2077
2078 if (sb) {
2079 if (MDI_CLIENT_IS_FAILED(ct)) {
2080 /*
2081 * Client is not ready to accept any I/O requests.
2082 * Fail this request.
2083 */
2084 MDI_DEBUG(2, (MDI_NOTE, cdip,
2085 "client state offline ct = %p", (void *)ct));
2086 MDI_CLIENT_UNLOCK(ct);
2087 return (MDI_FAILURE);
2088 }
2089
2090 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
2091 /*
2092 * Check for Failover is in progress. If so tell the
2093 * caller that this device is busy.
2094 */
2095 MDI_DEBUG(2, (MDI_NOTE, cdip,
2096 "client failover in progress ct = %p",
2097 (void *)ct));
2098 MDI_CLIENT_UNLOCK(ct);
2099 return (MDI_BUSY);
2100 }
2101
2102 /*
2103 * Check to see whether the client device is attached.
2104 * If not so, let the vHCI driver manually select a path
2105 * (standby) and let the probe/attach process to continue.
2106 */
2107 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) {
2108 MDI_DEBUG(4, (MDI_NOTE, cdip,
2109 "devi is onlining ct = %p", (void *)ct));
2110 MDI_CLIENT_UNLOCK(ct);
2111 return (MDI_DEVI_ONLINING);
2112 }
2113 }
2114
2115 /*
2116 * Cache in the client list head. If head of the list is NULL
2117 * return MDI_NOPATH
2118 */
2119 head = ct->ct_path_head;
2120 if (head == NULL) {
2121 MDI_CLIENT_UNLOCK(ct);
2122 return (MDI_NOPATH);
2123 }
2124
2125 /* Caller is specifying a specific pathinfo path by path_instance */
2126 if (path_instance) {
2127 /* search for pathinfo with correct path_instance */
2128 for (pip = head;
2129 pip && (mdi_pi_get_path_instance(pip) != path_instance);
2130 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link)
2131 ;
2132
2133 /* If path can't be selected then MDI_NOPATH is returned. */
2134 if (pip == NULL) {
2135 MDI_CLIENT_UNLOCK(ct);
2136 return (MDI_NOPATH);
2137 }
2138
2139 /*
2140 * Verify state of path. When asked to select a specific
2141 * path_instance, we select the requested path in any
2142 * state (ONLINE, OFFLINE, STANDBY, FAULT) other than INIT.
2143 * We don't however select paths where the pHCI has detached.
2144 * NOTE: last pathinfo node of an opened client device may
2145 * exist in an OFFLINE state after the pHCI associated with
2146 * that path has detached (but pi_phci will be NULL if that
2147 * has occurred).
2148 */
2149 MDI_PI_LOCK(pip);
2150 if ((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_INIT) ||
2151 (MDI_PI(pip)->pi_phci == NULL)) {
2152 MDI_PI_UNLOCK(pip);
2153 MDI_CLIENT_UNLOCK(ct);
2154 return (MDI_FAILURE);
2155 }
2156
2157 /* Return MDI_BUSY if we have a transient condition */
2158 if (MDI_PI_IS_TRANSIENT(pip)) {
2159 MDI_PI_UNLOCK(pip);
2160 MDI_CLIENT_UNLOCK(ct);
2161 return (MDI_BUSY);
2162 }
2163
2164 /*
2165 * Return the path in hold state. Caller should release the
2166 * lock by calling mdi_rele_path()
2167 */
2168 MDI_PI_HOLD(pip);
2169 MDI_PI_UNLOCK(pip);
2170 *ret_pip = pip;
2171 MDI_CLIENT_UNLOCK(ct);
2172 return (MDI_SUCCESS);
2173 }
2174
2175 /*
2176 * for non default behavior, bypass current
2177 * load balancing policy and always use LOAD_BALANCE_RR
2178 * except that the start point will be adjusted based
2179 * on the provided start_pip
2180 */
2181 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR;
2182
2183 switch (lbp) {
2184 case LOAD_BALANCE_NONE:
2185 /*
2186 * Load balancing is None or Alternate path mode
2187 * Start looking for a online mdi_pathinfo node starting from
2188 * last known selected path
2189 */
2190 preferred = 1;
2191 pip = (mdi_pathinfo_t *)ct->ct_path_last;
2192 if (pip == NULL) {
2193 pip = head;
2194 }
2195 start = pip;
2196 do {
2197 MDI_PI_LOCK(pip);
2198 /*
2199 * No need to explicitly check if the path is disabled.
2200 * Since we are checking for state == ONLINE and the
2201 * same variable is used for DISABLE/ENABLE information.
2202 */
2203 if ((MDI_PI(pip)->pi_state ==
2204 MDI_PATHINFO_STATE_ONLINE) &&
2205 preferred == MDI_PI(pip)->pi_preferred) {
2206 /*
2207 * Return the path in hold state. Caller should
2208 * release the lock by calling mdi_rele_path()
2209 */
2210 MDI_PI_HOLD(pip);
2211 MDI_PI_UNLOCK(pip);
2212 ct->ct_path_last = pip;
2213 *ret_pip = pip;
2214 MDI_CLIENT_UNLOCK(ct);
2215 return (MDI_SUCCESS);
2216 }
2217
2218 /*
2219 * Path is busy.
2220 */
2221 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2222 MDI_PI_IS_TRANSIENT(pip))
2223 retry = 1;
2224 /*
2225 * Keep looking for a next available online path
2226 */
2227 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2228 if (next == NULL) {
2229 next = head;
2230 }
2231 MDI_PI_UNLOCK(pip);
2232 pip = next;
2233 if (start == pip && preferred) {
2234 preferred = 0;
2235 } else if (start == pip && !preferred) {
2236 cont = 0;
2237 }
2238 } while (cont);
2239 break;
2240
2241 case LOAD_BALANCE_LBA:
2242 /*
2243 * Make sure we are looking
2244 * for an online path. Otherwise, if it is for a STANDBY
2245 * path request, it will go through and fetch an ONLINE
2246 * path which is not desirable.
2247 */
2248 if ((ct->ct_lb_args != NULL) &&
2249 (ct->ct_lb_args->region_size) && bp &&
2250 (sb || (flags == MDI_SELECT_ONLINE_PATH))) {
2251 if (i_mdi_lba_lb(ct, ret_pip, bp)
2252 == MDI_SUCCESS) {
2253 MDI_CLIENT_UNLOCK(ct);
2254 return (MDI_SUCCESS);
2255 }
2256 }
2257 /* FALLTHROUGH */
2258 case LOAD_BALANCE_RR:
2259 /*
2260 * Load balancing is Round Robin. Start looking for a online
2261 * mdi_pathinfo node starting from last known selected path
2262 * as the start point. If override flags are specified,
2263 * process accordingly.
2264 * If the search is already in effect(start_pip not null),
2265 * then lets just use the same path preference to continue the
2266 * traversal.
2267 */
2268
2269 if (start_pip != NULL) {
2270 preferred = MDI_PI(start_pip)->pi_preferred;
2271 } else {
2272 preferred = 1;
2273 }
2274
2275 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip;
2276 if (start == NULL) {
2277 pip = head;
2278 } else {
2279 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link;
2280 if (pip == NULL) {
2281 if ( flags & MDI_SELECT_NO_PREFERRED) {
2282 /*
2283 * Return since we hit the end of list
2284 */
2285 MDI_CLIENT_UNLOCK(ct);
2286 return (MDI_NOPATH);
2287 }
2288
2289 if (!sb) {
2290 if (preferred == 0) {
2291 /*
2292 * Looks like we have completed
2293 * the traversal as preferred
2294 * value is 0. Time to bail out.
2295 */
2296 *ret_pip = NULL;
2297 MDI_CLIENT_UNLOCK(ct);
2298 return (MDI_NOPATH);
2299 } else {
2300 /*
2301 * Looks like we reached the
2302 * end of the list. Lets enable
2303 * traversal of non preferred
2304 * paths.
2305 */
2306 preferred = 0;
2307 }
2308 }
2309 pip = head;
2310 }
2311 }
2312 start = pip;
2313 do {
2314 MDI_PI_LOCK(pip);
2315 if (sb) {
2316 cond = ((MDI_PI(pip)->pi_state ==
2317 MDI_PATHINFO_STATE_ONLINE &&
2318 MDI_PI(pip)->pi_preferred ==
2319 preferred) ? 1 : 0);
2320 } else {
2321 if (flags == MDI_SELECT_ONLINE_PATH) {
2322 cond = ((MDI_PI(pip)->pi_state ==
2323 MDI_PATHINFO_STATE_ONLINE &&
2324 MDI_PI(pip)->pi_preferred ==
2325 preferred) ? 1 : 0);
2326 } else if (flags == MDI_SELECT_STANDBY_PATH) {
2327 cond = ((MDI_PI(pip)->pi_state ==
2328 MDI_PATHINFO_STATE_STANDBY &&
2329 MDI_PI(pip)->pi_preferred ==
2330 preferred) ? 1 : 0);
2331 } else if (flags == (MDI_SELECT_ONLINE_PATH |
2332 MDI_SELECT_STANDBY_PATH)) {
2333 cond = (((MDI_PI(pip)->pi_state ==
2334 MDI_PATHINFO_STATE_ONLINE ||
2335 (MDI_PI(pip)->pi_state ==
2336 MDI_PATHINFO_STATE_STANDBY)) &&
2337 MDI_PI(pip)->pi_preferred ==
2338 preferred) ? 1 : 0);
2339 } else if (flags ==
2340 (MDI_SELECT_STANDBY_PATH |
2341 MDI_SELECT_ONLINE_PATH |
2342 MDI_SELECT_USER_DISABLE_PATH)) {
2343 cond = (((MDI_PI(pip)->pi_state ==
2344 MDI_PATHINFO_STATE_ONLINE ||
2345 (MDI_PI(pip)->pi_state ==
2346 MDI_PATHINFO_STATE_STANDBY) ||
2347 (MDI_PI(pip)->pi_state ==
2348 (MDI_PATHINFO_STATE_ONLINE|
2349 MDI_PATHINFO_STATE_USER_DISABLE)) ||
2350 (MDI_PI(pip)->pi_state ==
2351 (MDI_PATHINFO_STATE_STANDBY |
2352 MDI_PATHINFO_STATE_USER_DISABLE)))&&
2353 MDI_PI(pip)->pi_preferred ==
2354 preferred) ? 1 : 0);
2355 } else if (flags ==
2356 (MDI_SELECT_STANDBY_PATH |
2357 MDI_SELECT_ONLINE_PATH |
2358 MDI_SELECT_NO_PREFERRED)) {
2359 cond = (((MDI_PI(pip)->pi_state ==
2360 MDI_PATHINFO_STATE_ONLINE) ||
2361 (MDI_PI(pip)->pi_state ==
2362 MDI_PATHINFO_STATE_STANDBY))
2363 ? 1 : 0);
2364 } else {
2365 cond = 0;
2366 }
2367 }
2368 /*
2369 * No need to explicitly check if the path is disabled.
2370 * Since we are checking for state == ONLINE and the
2371 * same variable is used for DISABLE/ENABLE information.
2372 */
2373 if (cond) {
2374 /*
2375 * Return the path in hold state. Caller should
2376 * release the lock by calling mdi_rele_path()
2377 */
2378 MDI_PI_HOLD(pip);
2379 MDI_PI_UNLOCK(pip);
2380 if (sb)
2381 ct->ct_path_last = pip;
2382 *ret_pip = pip;
2383 MDI_CLIENT_UNLOCK(ct);
2384 return (MDI_SUCCESS);
2385 }
2386 /*
2387 * Path is busy.
2388 */
2389 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2390 MDI_PI_IS_TRANSIENT(pip))
2391 retry = 1;
2392
2393 /*
2394 * Keep looking for a next available online path
2395 */
2396 do_again:
2397 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2398 if (next == NULL) {
2399 if ( flags & MDI_SELECT_NO_PREFERRED) {
2400 /*
2401 * Bail out since we hit the end of list
2402 */
2403 MDI_PI_UNLOCK(pip);
2404 break;
2405 }
2406
2407 if (!sb) {
2408 if (preferred == 1) {
2409 /*
2410 * Looks like we reached the
2411 * end of the list. Lets enable
2412 * traversal of non preferred
2413 * paths.
2414 */
2415 preferred = 0;
2416 next = head;
2417 } else {
2418 /*
2419 * We have done both the passes
2420 * Preferred as well as for
2421 * Non-preferred. Bail out now.
2422 */
2423 cont = 0;
2424 }
2425 } else {
2426 /*
2427 * Standard behavior case.
2428 */
2429 next = head;
2430 }
2431 }
2432 MDI_PI_UNLOCK(pip);
2433 if (cont == 0) {
2434 break;
2435 }
2436 pip = next;
2437
2438 if (!sb) {
2439 /*
2440 * We need to handle the selection of
2441 * non-preferred path in the following
2442 * case:
2443 *
2444 * +------+ +------+ +------+ +-----+
2445 * | A : 1| - | B : 1| - | C : 0| - |NULL |
2446 * +------+ +------+ +------+ +-----+
2447 *
2448 * If we start the search with B, we need to
2449 * skip beyond B to pick C which is non -
2450 * preferred in the second pass. The following
2451 * test, if true, will allow us to skip over
2452 * the 'start'(B in the example) to select
2453 * other non preferred elements.
2454 */
2455 if ((start_pip != NULL) && (start_pip == pip) &&
2456 (MDI_PI(start_pip)->pi_preferred
2457 != preferred)) {
2458 /*
2459 * try again after going past the start
2460 * pip
2461 */
2462 MDI_PI_LOCK(pip);
2463 goto do_again;
2464 }
2465 } else {
2466 /*
2467 * Standard behavior case
2468 */
2469 if (start == pip && preferred) {
2470 /* look for nonpreferred paths */
2471 preferred = 0;
2472 } else if (start == pip && !preferred) {
2473 /*
2474 * Exit condition
2475 */
2476 cont = 0;
2477 }
2478 }
2479 } while (cont);
2480 break;
2481 }
2482
2483 MDI_CLIENT_UNLOCK(ct);
2484 if (retry == 1) {
2485 return (MDI_BUSY);
2486 } else {
2487 return (MDI_NOPATH);
2488 }
2489 }
2490
2491 /*
2492 * For a client, return the next available path to any phci
2493 *
2494 * Note:
2495 * Caller should hold the branch's devinfo node to get a consistent
2496 * snap shot of the mdi_pathinfo nodes.
2497 *
2498 * Please note that even the list is stable the mdi_pathinfo
2499 * node state and properties are volatile. The caller should lock
2500 * and unlock the nodes by calling mdi_pi_lock() and
2501 * mdi_pi_unlock() functions to get a stable properties.
2502 *
2503 * If there is a need to use the nodes beyond the hold of the
2504 * devinfo node period (For ex. I/O), then mdi_pathinfo node
2505 * need to be held against unexpected removal by calling
2506 * mdi_hold_path() and should be released by calling
2507 * mdi_rele_path() on completion.
2508 */
2509 mdi_pathinfo_t *
2510 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip)
2511 {
2512 mdi_client_t *ct;
2513
2514 if (!MDI_CLIENT(ct_dip))
2515 return (NULL);
2516
2517 /*
2518 * Walk through client link
2519 */
2520 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client;
2521 ASSERT(ct != NULL);
2522
2523 if (pip == NULL)
2524 return ((mdi_pathinfo_t *)ct->ct_path_head);
2525
2526 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link);
2527 }
2528
2529 /*
2530 * For a phci, return the next available path to any client
2531 * Note: ditto mdi_get_next_phci_path()
2532 */
2533 mdi_pathinfo_t *
2534 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip)
2535 {
2536 mdi_phci_t *ph;
2537
2538 if (!MDI_PHCI(ph_dip))
2539 return (NULL);
2540
2541 /*
2542 * Walk through pHCI link
2543 */
2544 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci;
2545 ASSERT(ph != NULL);
2546
2547 if (pip == NULL)
2548 return ((mdi_pathinfo_t *)ph->ph_path_head);
2549
2550 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link);
2551 }
2552
2553 /*
2554 * mdi_hold_path():
2555 * Hold the mdi_pathinfo node against unwanted unexpected free.
2556 * Return Values:
2557 * None
2558 */
2559 void
2560 mdi_hold_path(mdi_pathinfo_t *pip)
2561 {
2562 if (pip) {
2563 MDI_PI_LOCK(pip);
2564 MDI_PI_HOLD(pip);
2565 MDI_PI_UNLOCK(pip);
2566 }
2567 }
2568
2569
2570 /*
2571 * mdi_rele_path():
2572 * Release the mdi_pathinfo node which was selected
2573 * through mdi_select_path() mechanism or manually held by
2574 * calling mdi_hold_path().
2575 * Return Values:
2576 * None
2577 */
2578 void
2579 mdi_rele_path(mdi_pathinfo_t *pip)
2580 {
2581 if (pip) {
2582 MDI_PI_LOCK(pip);
2583 MDI_PI_RELE(pip);
2584 if (MDI_PI(pip)->pi_ref_cnt == 0) {
2585 cv_broadcast(&MDI_PI(pip)->pi_ref_cv);
2586 }
2587 MDI_PI_UNLOCK(pip);
2588 }
2589 }
2590
2591 /*
2592 * mdi_pi_lock():
2593 * Lock the mdi_pathinfo node.
2594 * Note:
2595 * The caller should release the lock by calling mdi_pi_unlock()
2596 */
2597 void
2598 mdi_pi_lock(mdi_pathinfo_t *pip)
2599 {
2600 ASSERT(pip != NULL);
2601 if (pip) {
2602 MDI_PI_LOCK(pip);
2603 }
2604 }
2605
2606
2607 /*
2608 * mdi_pi_unlock():
2609 * Unlock the mdi_pathinfo node.
2610 * Note:
2611 * The mdi_pathinfo node should have been locked with mdi_pi_lock()
2612 */
2613 void
2614 mdi_pi_unlock(mdi_pathinfo_t *pip)
2615 {
2616 ASSERT(pip != NULL);
2617 if (pip) {
2618 MDI_PI_UNLOCK(pip);
2619 }
2620 }
2621
2622 /*
2623 * mdi_pi_find():
2624 * Search the list of mdi_pathinfo nodes attached to the
2625 * pHCI/Client device node whose path address matches "paddr".
2626 * Returns a pointer to the mdi_pathinfo node if a matching node is
2627 * found.
2628 * Return Values:
2629 * mdi_pathinfo node handle
2630 * NULL
2631 * Notes:
2632 * Caller need not hold any locks to call this function.
2633 */
2634 mdi_pathinfo_t *
2635 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr)
2636 {
2637 mdi_phci_t *ph;
2638 mdi_vhci_t *vh;
2639 mdi_client_t *ct;
2640 mdi_pathinfo_t *pip = NULL;
2641
2642 MDI_DEBUG(2, (MDI_NOTE, pdip,
2643 "caddr@%s paddr@%s", caddr ? caddr : "", paddr ? paddr : ""));
2644 if ((pdip == NULL) || (paddr == NULL)) {
2645 return (NULL);
2646 }
2647 ph = i_devi_get_phci(pdip);
2648 if (ph == NULL) {
2649 /*
2650 * Invalid pHCI device, Nothing more to do.
2651 */
2652 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid phci"));
2653 return (NULL);
2654 }
2655
2656 vh = ph->ph_vhci;
2657 if (vh == NULL) {
2658 /*
2659 * Invalid vHCI device, Nothing more to do.
2660 */
2661 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid vhci"));
2662 return (NULL);
2663 }
2664
2665 /*
2666 * Look for pathinfo node identified by paddr.
2667 */
2668 if (caddr == NULL) {
2669 /*
2670 * Find a mdi_pathinfo node under pHCI list for a matching
2671 * unit address.
2672 */
2673 MDI_PHCI_LOCK(ph);
2674 if (MDI_PHCI_IS_OFFLINE(ph)) {
2675 MDI_DEBUG(2, (MDI_WARN, pdip,
2676 "offline phci %p", (void *)ph));
2677 MDI_PHCI_UNLOCK(ph);
2678 return (NULL);
2679 }
2680 pip = (mdi_pathinfo_t *)ph->ph_path_head;
2681
2682 while (pip != NULL) {
2683 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2684 break;
2685 }
2686 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
2687 }
2688 MDI_PHCI_UNLOCK(ph);
2689 MDI_DEBUG(2, (MDI_NOTE, pdip,
2690 "found %s %p", mdi_pi_spathname(pip), (void *)pip));
2691 return (pip);
2692 }
2693
2694 /*
2695 * XXX - Is the rest of the code in this function really necessary?
2696 * The consumers of mdi_pi_find() can search for the desired pathinfo
2697 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of
2698 * whether the search is based on the pathinfo nodes attached to
2699 * the pHCI or the client node, the result will be the same.
2700 */
2701
2702 /*
2703 * Find the client device corresponding to 'caddr'
2704 */
2705 MDI_VHCI_CLIENT_LOCK(vh);
2706
2707 /*
2708 * XXX - Passing NULL to the following function works as long as the
2709 * the client addresses (caddr) are unique per vhci basis.
2710 */
2711 ct = i_mdi_client_find(vh, NULL, caddr);
2712 if (ct == NULL) {
2713 /*
2714 * Client not found, Obviously mdi_pathinfo node has not been
2715 * created yet.
2716 */
2717 MDI_VHCI_CLIENT_UNLOCK(vh);
2718 MDI_DEBUG(2, (MDI_NOTE, pdip,
2719 "client not found for caddr @%s", caddr ? caddr : ""));
2720 return (NULL);
2721 }
2722
2723 /*
2724 * Hold the client lock and look for a mdi_pathinfo node with matching
2725 * pHCI and paddr
2726 */
2727 MDI_CLIENT_LOCK(ct);
2728
2729 /*
2730 * Release the global mutex as it is no more needed. Note: We always
2731 * respect the locking order while acquiring.
2732 */
2733 MDI_VHCI_CLIENT_UNLOCK(vh);
2734
2735 pip = (mdi_pathinfo_t *)ct->ct_path_head;
2736 while (pip != NULL) {
2737 /*
2738 * Compare the unit address
2739 */
2740 if ((MDI_PI(pip)->pi_phci == ph) &&
2741 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2742 break;
2743 }
2744 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2745 }
2746 MDI_CLIENT_UNLOCK(ct);
2747 MDI_DEBUG(2, (MDI_NOTE, pdip,
2748 "found: %s %p", mdi_pi_spathname(pip), (void *)pip));
2749 return (pip);
2750 }
2751
2752 /*
2753 * mdi_pi_alloc():
2754 * Allocate and initialize a new instance of a mdi_pathinfo node.
2755 * The mdi_pathinfo node returned by this function identifies a
2756 * unique device path is capable of having properties attached
2757 * and passed to mdi_pi_online() to fully attach and online the
2758 * path and client device node.
2759 * The mdi_pathinfo node returned by this function must be
2760 * destroyed using mdi_pi_free() if the path is no longer
2761 * operational or if the caller fails to attach a client device
2762 * node when calling mdi_pi_online(). The framework will not free
2763 * the resources allocated.
2764 * This function can be called from both interrupt and kernel
2765 * contexts. DDI_NOSLEEP flag should be used while calling
2766 * from interrupt contexts.
2767 * Return Values:
2768 * MDI_SUCCESS
2769 * MDI_FAILURE
2770 * MDI_NOMEM
2771 */
2772 /*ARGSUSED*/
2773 int
2774 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2775 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip)
2776 {
2777 mdi_vhci_t *vh;
2778 mdi_phci_t *ph;
2779 mdi_client_t *ct;
2780 mdi_pathinfo_t *pip = NULL;
2781 dev_info_t *cdip;
2782 int rv = MDI_NOMEM;
2783 int path_allocated = 0;
2784
2785 MDI_DEBUG(2, (MDI_NOTE, pdip,
2786 "cname %s: caddr@%s paddr@%s",
2787 cname ? cname : "", caddr ? caddr : "", paddr ? paddr : ""));
2788
2789 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL ||
2790 ret_pip == NULL) {
2791 /* Nothing more to do */
2792 return (MDI_FAILURE);
2793 }
2794
2795 *ret_pip = NULL;
2796
2797 /* No allocations on detaching pHCI */
2798 if (DEVI_IS_DETACHING(pdip)) {
2799 /* Invalid pHCI device, return failure */
2800 MDI_DEBUG(1, (MDI_WARN, pdip,
2801 "!detaching pHCI=%p", (void *)pdip));
2802 return (MDI_FAILURE);
2803 }
2804
2805 ph = i_devi_get_phci(pdip);
2806 ASSERT(ph != NULL);
2807 if (ph == NULL) {
2808 /* Invalid pHCI device, return failure */
2809 MDI_DEBUG(1, (MDI_WARN, pdip,
2810 "!invalid pHCI=%p", (void *)pdip));
2811 return (MDI_FAILURE);
2812 }
2813
2814 MDI_PHCI_LOCK(ph);
2815 vh = ph->ph_vhci;
2816 if (vh == NULL) {
2817 /* Invalid vHCI device, return failure */
2818 MDI_DEBUG(1, (MDI_WARN, pdip,
2819 "!invalid vHCI=%p", (void *)pdip));
2820 MDI_PHCI_UNLOCK(ph);
2821 return (MDI_FAILURE);
2822 }
2823
2824 if (MDI_PHCI_IS_READY(ph) == 0) {
2825 /*
2826 * Do not allow new node creation when pHCI is in
2827 * offline/suspended states
2828 */
2829 MDI_DEBUG(1, (MDI_WARN, pdip,
2830 "pHCI=%p is not ready", (void *)ph));
2831 MDI_PHCI_UNLOCK(ph);
2832 return (MDI_BUSY);
2833 }
2834 MDI_PHCI_UNSTABLE(ph);
2835 MDI_PHCI_UNLOCK(ph);
2836
2837 /* look for a matching client, create one if not found */
2838 MDI_VHCI_CLIENT_LOCK(vh);
2839 ct = i_mdi_client_find(vh, cname, caddr);
2840 if (ct == NULL) {
2841 ct = i_mdi_client_alloc(vh, cname, caddr);
2842 ASSERT(ct != NULL);
2843 }
2844
2845 if (ct->ct_dip == NULL) {
2846 /*
2847 * Allocate a devinfo node
2848 */
2849 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr,
2850 compatible, ncompatible);
2851 if (ct->ct_dip == NULL) {
2852 (void) i_mdi_client_free(vh, ct);
2853 goto fail;
2854 }
2855 }
2856 cdip = ct->ct_dip;
2857
2858 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT;
2859 DEVI(cdip)->devi_mdi_client = (caddr_t)ct;
2860
2861 MDI_CLIENT_LOCK(ct);
2862 pip = (mdi_pathinfo_t *)ct->ct_path_head;
2863 while (pip != NULL) {
2864 /*
2865 * Compare the unit address
2866 */
2867 if ((MDI_PI(pip)->pi_phci == ph) &&
2868 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2869 break;
2870 }
2871 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2872 }
2873 MDI_CLIENT_UNLOCK(ct);
2874
2875 if (pip == NULL) {
2876 /*
2877 * This is a new path for this client device. Allocate and
2878 * initialize a new pathinfo node
2879 */
2880 pip = i_mdi_pi_alloc(ph, paddr, ct);
2881 ASSERT(pip != NULL);
2882 path_allocated = 1;
2883 }
2884 rv = MDI_SUCCESS;
2885
2886 fail:
2887 /*
2888 * Release the global mutex.
2889 */
2890 MDI_VHCI_CLIENT_UNLOCK(vh);
2891
2892 /*
2893 * Mark the pHCI as stable
2894 */
2895 MDI_PHCI_LOCK(ph);
2896 MDI_PHCI_STABLE(ph);
2897 MDI_PHCI_UNLOCK(ph);
2898 *ret_pip = pip;
2899
2900 MDI_DEBUG(2, (MDI_NOTE, pdip,
2901 "alloc %s %p", mdi_pi_spathname(pip), (void *)pip));
2902
2903 if (path_allocated)
2904 vhcache_pi_add(vh->vh_config, MDI_PI(pip));
2905
2906 return (rv);
2907 }
2908
2909 /*ARGSUSED*/
2910 int
2911 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2912 int flags, mdi_pathinfo_t **ret_pip)
2913 {
2914 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0,
2915 flags, ret_pip));
2916 }
2917
2918 /*
2919 * i_mdi_pi_alloc():
2920 * Allocate a mdi_pathinfo node and add to the pHCI path list
2921 * Return Values:
2922 * mdi_pathinfo
2923 */
2924 /*ARGSUSED*/
2925 static mdi_pathinfo_t *
2926 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct)
2927 {
2928 mdi_pathinfo_t *pip;
2929 int ct_circular;
2930 int ph_circular;
2931 static char path[MAXPATHLEN]; /* mdi_pathmap_mutex protects */
2932 char *path_persistent;
2933 int path_instance;
2934 mod_hash_val_t hv;
2935
2936 ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci));
2937
2938 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP);
2939 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL);
2940 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT |
2941 MDI_PATHINFO_STATE_TRANSIENT;
2942
2943 if (MDI_PHCI_IS_USER_DISABLED(ph))
2944 MDI_PI_SET_USER_DISABLE(pip);
2945
2946 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph))
2947 MDI_PI_SET_DRV_DISABLE_TRANS(pip);
2948
2949 if (MDI_PHCI_IS_DRV_DISABLED(ph))
2950 MDI_PI_SET_DRV_DISABLE(pip);
2951
2952 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT;
2953 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL);
2954 MDI_PI(pip)->pi_client = ct;
2955 MDI_PI(pip)->pi_phci = ph;
2956 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP);
2957 (void) strcpy(MDI_PI(pip)->pi_addr, paddr);
2958
2959 /*
2960 * We form the "path" to the pathinfo node, and see if we have
2961 * already allocated a 'path_instance' for that "path". If so,
2962 * we use the already allocated 'path_instance'. If not, we
2963 * allocate a new 'path_instance' and associate it with a copy of
2964 * the "path" string (which is never freed). The association
2965 * between a 'path_instance' this "path" string persists until
2966 * reboot.
2967 */
2968 mutex_enter(&mdi_pathmap_mutex);
2969 (void) ddi_pathname(ph->ph_dip, path);
2970 (void) sprintf(path + strlen(path), "/%s@%s",
2971 mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
2972 if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) {
2973 path_instance = (uint_t)(intptr_t)hv;
2974 } else {
2975 /* allocate a new 'path_instance' and persistent "path" */
2976 path_instance = mdi_pathmap_instance++;
2977 path_persistent = i_ddi_strdup(path, KM_SLEEP);
2978 (void) mod_hash_insert(mdi_pathmap_bypath,
2979 (mod_hash_key_t)path_persistent,
2980 (mod_hash_val_t)(intptr_t)path_instance);
2981 (void) mod_hash_insert(mdi_pathmap_byinstance,
2982 (mod_hash_key_t)(intptr_t)path_instance,
2983 (mod_hash_val_t)path_persistent);
2984
2985 /* create shortpath name */
2986 (void) snprintf(path, sizeof(path), "%s%d/%s@%s",
2987 ddi_driver_name(ph->ph_dip), ddi_get_instance(ph->ph_dip),
2988 mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
2989 path_persistent = i_ddi_strdup(path, KM_SLEEP);
2990 (void) mod_hash_insert(mdi_pathmap_sbyinstance,
2991 (mod_hash_key_t)(intptr_t)path_instance,
2992 (mod_hash_val_t)path_persistent);
2993 }
2994 mutex_exit(&mdi_pathmap_mutex);
2995 MDI_PI(pip)->pi_path_instance = path_instance;
2996
2997 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP);
2998 ASSERT(MDI_PI(pip)->pi_prop != NULL);
2999 MDI_PI(pip)->pi_pprivate = NULL;
3000 MDI_PI(pip)->pi_cprivate = NULL;
3001 MDI_PI(pip)->pi_vprivate = NULL;
3002 MDI_PI(pip)->pi_client_link = NULL;
3003 MDI_PI(pip)->pi_phci_link = NULL;
3004 MDI_PI(pip)->pi_ref_cnt = 0;
3005 MDI_PI(pip)->pi_kstats = NULL;
3006 MDI_PI(pip)->pi_preferred = 1;
3007 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL);
3008
3009 /*
3010 * Lock both dev_info nodes against changes in parallel.
3011 *
3012 * The ndi_devi_enter(Client), is atypical since the client is a leaf.
3013 * This atypical operation is done to synchronize pathinfo nodes
3014 * during devinfo snapshot (see di_register_pip) by 'pretending' that
3015 * the pathinfo nodes are children of the Client.
3016 */
3017 ndi_devi_enter(ct->ct_dip, &ct_circular);
3018 ndi_devi_enter(ph->ph_dip, &ph_circular);
3019
3020 i_mdi_phci_add_path(ph, pip);
3021 i_mdi_client_add_path(ct, pip);
3022
3023 ndi_devi_exit(ph->ph_dip, ph_circular);
3024 ndi_devi_exit(ct->ct_dip, ct_circular);
3025
3026 return (pip);
3027 }
3028
3029 /*
3030 * mdi_pi_pathname_by_instance():
3031 * Lookup of "path" by 'path_instance'. Return "path".
3032 * NOTE: returned "path" remains valid forever (until reboot).
3033 */
3034 char *
3035 mdi_pi_pathname_by_instance(int path_instance)
3036 {
3037 char *path;
3038 mod_hash_val_t hv;
3039
3040 /* mdi_pathmap lookup of "path" by 'path_instance' */
3041 mutex_enter(&mdi_pathmap_mutex);
3042 if (mod_hash_find(mdi_pathmap_byinstance,
3043 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
3044 path = (char *)hv;
3045 else
3046 path = NULL;
3047 mutex_exit(&mdi_pathmap_mutex);
3048 return (path);
3049 }
3050
3051 /*
3052 * mdi_pi_spathname_by_instance():
3053 * Lookup of "shortpath" by 'path_instance'. Return "shortpath".
3054 * NOTE: returned "shortpath" remains valid forever (until reboot).
3055 */
3056 char *
3057 mdi_pi_spathname_by_instance(int path_instance)
3058 {
3059 char *path;
3060 mod_hash_val_t hv;
3061
3062 /* mdi_pathmap lookup of "path" by 'path_instance' */
3063 mutex_enter(&mdi_pathmap_mutex);
3064 if (mod_hash_find(mdi_pathmap_sbyinstance,
3065 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
3066 path = (char *)hv;
3067 else
3068 path = NULL;
3069 mutex_exit(&mdi_pathmap_mutex);
3070 return (path);
3071 }
3072
3073
3074 /*
3075 * i_mdi_phci_add_path():
3076 * Add a mdi_pathinfo node to pHCI list.
3077 * Notes:
3078 * Caller should per-pHCI mutex
3079 */
3080 static void
3081 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3082 {
3083 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3084
3085 MDI_PHCI_LOCK(ph);
3086 if (ph->ph_path_head == NULL) {
3087 ph->ph_path_head = pip;
3088 } else {
3089 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip);
3090 }
3091 ph->ph_path_tail = pip;
3092 ph->ph_path_count++;
3093 MDI_PHCI_UNLOCK(ph);
3094 }
3095
3096 /*
3097 * i_mdi_client_add_path():
3098 * Add mdi_pathinfo node to client list
3099 */
3100 static void
3101 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3102 {
3103 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3104
3105 MDI_CLIENT_LOCK(ct);
3106 if (ct->ct_path_head == NULL) {
3107 ct->ct_path_head = pip;
3108 } else {
3109 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip);
3110 }
3111 ct->ct_path_tail = pip;
3112 ct->ct_path_count++;
3113 MDI_CLIENT_UNLOCK(ct);
3114 }
3115
3116 /*
3117 * mdi_pi_free():
3118 * Free the mdi_pathinfo node and also client device node if this
3119 * is the last path to the device
3120 * Return Values:
3121 * MDI_SUCCESS
3122 * MDI_FAILURE
3123 * MDI_BUSY
3124 */
3125 /*ARGSUSED*/
3126 int
3127 mdi_pi_free(mdi_pathinfo_t *pip, int flags)
3128 {
3129 int rv;
3130 mdi_vhci_t *vh;
3131 mdi_phci_t *ph;
3132 mdi_client_t *ct;
3133 int (*f)();
3134 int client_held = 0;
3135
3136 MDI_PI_LOCK(pip);
3137 ph = MDI_PI(pip)->pi_phci;
3138 ASSERT(ph != NULL);
3139 if (ph == NULL) {
3140 /*
3141 * Invalid pHCI device, return failure
3142 */
3143 MDI_DEBUG(1, (MDI_WARN, NULL,
3144 "!invalid pHCI: pip %s %p",
3145 mdi_pi_spathname(pip), (void *)pip));
3146 MDI_PI_UNLOCK(pip);
3147 return (MDI_FAILURE);
3148 }
3149
3150 vh = ph->ph_vhci;
3151 ASSERT(vh != NULL);
3152 if (vh == NULL) {
3153 /* Invalid pHCI device, return failure */
3154 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3155 "!invalid vHCI: pip %s %p",
3156 mdi_pi_spathname(pip), (void *)pip));
3157 MDI_PI_UNLOCK(pip);
3158 return (MDI_FAILURE);
3159 }
3160
3161 ct = MDI_PI(pip)->pi_client;
3162 ASSERT(ct != NULL);
3163 if (ct == NULL) {
3164 /*
3165 * Invalid Client device, return failure
3166 */
3167 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3168 "!invalid client: pip %s %p",
3169 mdi_pi_spathname(pip), (void *)pip));
3170 MDI_PI_UNLOCK(pip);
3171 return (MDI_FAILURE);
3172 }
3173
3174 /*
3175 * Check to see for busy condition. A mdi_pathinfo can only be freed
3176 * if the node state is either offline or init and the reference count
3177 * is zero.
3178 */
3179 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) ||
3180 MDI_PI_IS_INITING(pip))) {
3181 /*
3182 * Node is busy
3183 */
3184 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3185 "!busy: pip %s %p", mdi_pi_spathname(pip), (void *)pip));
3186 MDI_PI_UNLOCK(pip);
3187 return (MDI_BUSY);
3188 }
3189
3190 while (MDI_PI(pip)->pi_ref_cnt != 0) {
3191 /*
3192 * Give a chance for pending I/Os to complete.
3193 */
3194 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3195 "!%d cmds still pending on path: %s %p",
3196 MDI_PI(pip)->pi_ref_cnt,
3197 mdi_pi_spathname(pip), (void *)pip));
3198 if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
3199 &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
3200 TR_CLOCK_TICK) == -1) {
3201 /*
3202 * The timeout time reached without ref_cnt being zero
3203 * being signaled.
3204 */
3205 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3206 "!Timeout reached on path %s %p without the cond",
3207 mdi_pi_spathname(pip), (void *)pip));
3208 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3209 "!%d cmds still pending on path %s %p",
3210 MDI_PI(pip)->pi_ref_cnt,
3211 mdi_pi_spathname(pip), (void *)pip));
3212 MDI_PI_UNLOCK(pip);
3213 return (MDI_BUSY);
3214 }
3215 }
3216 if (MDI_PI(pip)->pi_pm_held) {
3217 client_held = 1;
3218 }
3219 MDI_PI_UNLOCK(pip);
3220
3221 vhcache_pi_remove(vh->vh_config, MDI_PI(pip));
3222
3223 MDI_CLIENT_LOCK(ct);
3224
3225 /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */
3226 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct);
3227
3228 /*
3229 * Wait till failover is complete before removing this node.
3230 */
3231 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3232 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3233
3234 MDI_CLIENT_UNLOCK(ct);
3235 MDI_VHCI_CLIENT_LOCK(vh);
3236 MDI_CLIENT_LOCK(ct);
3237 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct);
3238
3239 if (!MDI_PI_IS_INITING(pip)) {
3240 f = vh->vh_ops->vo_pi_uninit;
3241 if (f != NULL) {
3242 rv = (*f)(vh->vh_dip, pip, 0);
3243 }
3244 } else
3245 rv = MDI_SUCCESS;
3246
3247 /*
3248 * If vo_pi_uninit() completed successfully.
3249 */
3250 if (rv == MDI_SUCCESS) {
3251 if (client_held) {
3252 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3253 "i_mdi_pm_rele_client\n"));
3254 i_mdi_pm_rele_client(ct, 1);
3255 }
3256 i_mdi_pi_free(ph, pip, ct);
3257 if (ct->ct_path_count == 0) {
3258 /*
3259 * Client lost its last path.
3260 * Clean up the client device
3261 */
3262 ct->ct_flags |= flags;
3263 MDI_CLIENT_UNLOCK(ct);
3264 (void) i_mdi_client_free(ct->ct_vhci, ct);
3265 MDI_VHCI_CLIENT_UNLOCK(vh);
3266 return (rv);
3267 }
3268 }
3269 MDI_CLIENT_UNLOCK(ct);
3270 MDI_VHCI_CLIENT_UNLOCK(vh);
3271
3272 if (rv == MDI_FAILURE)
3273 vhcache_pi_add(vh->vh_config, MDI_PI(pip));
3274
3275 return (rv);
3276 }
3277
3278 /*
3279 * i_mdi_pi_free():
3280 * Free the mdi_pathinfo node
3281 */
3282 static void
3283 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct)
3284 {
3285 int ct_circular;
3286 int ph_circular;
3287
3288 ASSERT(MDI_CLIENT_LOCKED(ct));
3289
3290 /*
3291 * remove any per-path kstats
3292 */
3293 i_mdi_pi_kstat_destroy(pip);
3294
3295 /* See comments in i_mdi_pi_alloc() */
3296 ndi_devi_enter(ct->ct_dip, &ct_circular);
3297 ndi_devi_enter(ph->ph_dip, &ph_circular);
3298
3299 i_mdi_client_remove_path(ct, pip);
3300 i_mdi_phci_remove_path(ph, pip);
3301
3302 ndi_devi_exit(ph->ph_dip, ph_circular);
3303 ndi_devi_exit(ct->ct_dip, ct_circular);
3304
3305 mutex_destroy(&MDI_PI(pip)->pi_mutex);
3306 cv_destroy(&MDI_PI(pip)->pi_state_cv);
3307 cv_destroy(&MDI_PI(pip)->pi_ref_cv);
3308 if (MDI_PI(pip)->pi_addr) {
3309 kmem_free(MDI_PI(pip)->pi_addr,
3310 strlen(MDI_PI(pip)->pi_addr) + 1);
3311 MDI_PI(pip)->pi_addr = NULL;
3312 }
3313
3314 if (MDI_PI(pip)->pi_prop) {
3315 (void) nvlist_free(MDI_PI(pip)->pi_prop);
3316 MDI_PI(pip)->pi_prop = NULL;
3317 }
3318 kmem_free(pip, sizeof (struct mdi_pathinfo));
3319 }
3320
3321
3322 /*
3323 * i_mdi_phci_remove_path():
3324 * Remove a mdi_pathinfo node from pHCI list.
3325 * Notes:
3326 * Caller should hold per-pHCI mutex
3327 */
3328 static void
3329 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3330 {
3331 mdi_pathinfo_t *prev = NULL;
3332 mdi_pathinfo_t *path = NULL;
3333
3334 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3335
3336 MDI_PHCI_LOCK(ph);
3337 path = ph->ph_path_head;
3338 while (path != NULL) {
3339 if (path == pip) {
3340 break;
3341 }
3342 prev = path;
3343 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3344 }
3345
3346 if (path) {
3347 ph->ph_path_count--;
3348 if (prev) {
3349 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link;
3350 } else {
3351 ph->ph_path_head =
3352 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3353 }
3354 if (ph->ph_path_tail == path) {
3355 ph->ph_path_tail = prev;
3356 }
3357 }
3358
3359 /*
3360 * Clear the pHCI link
3361 */
3362 MDI_PI(pip)->pi_phci_link = NULL;
3363 MDI_PI(pip)->pi_phci = NULL;
3364 MDI_PHCI_UNLOCK(ph);
3365 }
3366
3367 /*
3368 * i_mdi_client_remove_path():
3369 * Remove a mdi_pathinfo node from client path list.
3370 */
3371 static void
3372 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3373 {
3374 mdi_pathinfo_t *prev = NULL;
3375 mdi_pathinfo_t *path;
3376
3377 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3378
3379 ASSERT(MDI_CLIENT_LOCKED(ct));
3380 path = ct->ct_path_head;
3381 while (path != NULL) {
3382 if (path == pip) {
3383 break;
3384 }
3385 prev = path;
3386 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3387 }
3388
3389 if (path) {
3390 ct->ct_path_count--;
3391 if (prev) {
3392 MDI_PI(prev)->pi_client_link =
3393 MDI_PI(path)->pi_client_link;
3394 } else {
3395 ct->ct_path_head =
3396 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3397 }
3398 if (ct->ct_path_tail == path) {
3399 ct->ct_path_tail = prev;
3400 }
3401 if (ct->ct_path_last == path) {
3402 ct->ct_path_last = ct->ct_path_head;
3403 }
3404 }
3405 MDI_PI(pip)->pi_client_link = NULL;
3406 MDI_PI(pip)->pi_client = NULL;
3407 }
3408
3409 /*
3410 * i_mdi_pi_state_change():
3411 * online a mdi_pathinfo node
3412 *
3413 * Return Values:
3414 * MDI_SUCCESS
3415 * MDI_FAILURE
3416 */
3417 /*ARGSUSED*/
3418 static int
3419 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag)
3420 {
3421 int rv = MDI_SUCCESS;
3422 mdi_vhci_t *vh;
3423 mdi_phci_t *ph;
3424 mdi_client_t *ct;
3425 int (*f)();
3426 dev_info_t *cdip;
3427
3428 MDI_PI_LOCK(pip);
3429
3430 ph = MDI_PI(pip)->pi_phci;
3431 ASSERT(ph);
3432 if (ph == NULL) {
3433 /*
3434 * Invalid pHCI device, fail the request
3435 */
3436 MDI_PI_UNLOCK(pip);
3437 MDI_DEBUG(1, (MDI_WARN, NULL,
3438 "!invalid phci: pip %s %p",
3439 mdi_pi_spathname(pip), (void *)pip));
3440 return (MDI_FAILURE);
3441 }
3442
3443 vh = ph->ph_vhci;
3444 ASSERT(vh);
3445 if (vh == NULL) {
3446 /*
3447 * Invalid vHCI device, fail the request
3448 */
3449 MDI_PI_UNLOCK(pip);
3450 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3451 "!invalid vhci: pip %s %p",
3452 mdi_pi_spathname(pip), (void *)pip));
3453 return (MDI_FAILURE);
3454 }
3455
3456 ct = MDI_PI(pip)->pi_client;
3457 ASSERT(ct != NULL);
3458 if (ct == NULL) {
3459 /*
3460 * Invalid client device, fail the request
3461 */
3462 MDI_PI_UNLOCK(pip);
3463 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3464 "!invalid client: pip %s %p",
3465 mdi_pi_spathname(pip), (void *)pip));
3466 return (MDI_FAILURE);
3467 }
3468
3469 /*
3470 * If this path has not been initialized yet, Callback vHCI driver's
3471 * pathinfo node initialize entry point
3472 */
3473
3474 if (MDI_PI_IS_INITING(pip)) {
3475 MDI_PI_UNLOCK(pip);
3476 f = vh->vh_ops->vo_pi_init;
3477 if (f != NULL) {
3478 rv = (*f)(vh->vh_dip, pip, 0);
3479 if (rv != MDI_SUCCESS) {
3480 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3481 "!vo_pi_init failed: vHCI %p, pip %s %p",
3482 (void *)vh, mdi_pi_spathname(pip),
3483 (void *)pip));
3484 return (MDI_FAILURE);
3485 }
3486 }
3487 MDI_PI_LOCK(pip);
3488 MDI_PI_CLEAR_TRANSIENT(pip);
3489 }
3490
3491 /*
3492 * Do not allow state transition when pHCI is in offline/suspended
3493 * states
3494 */
3495 i_mdi_phci_lock(ph, pip);
3496 if (MDI_PHCI_IS_READY(ph) == 0) {
3497 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3498 "!pHCI not ready, pHCI=%p", (void *)ph));
3499 MDI_PI_UNLOCK(pip);
3500 i_mdi_phci_unlock(ph);
3501 return (MDI_BUSY);
3502 }
3503 MDI_PHCI_UNSTABLE(ph);
3504 i_mdi_phci_unlock(ph);
3505
3506 /*
3507 * Check if mdi_pathinfo state is in transient state.
3508 * If yes, offlining is in progress and wait till transient state is
3509 * cleared.
3510 */
3511 if (MDI_PI_IS_TRANSIENT(pip)) {
3512 while (MDI_PI_IS_TRANSIENT(pip)) {
3513 cv_wait(&MDI_PI(pip)->pi_state_cv,
3514 &MDI_PI(pip)->pi_mutex);
3515 }
3516 }
3517
3518 /*
3519 * Grab the client lock in reverse order sequence and release the
3520 * mdi_pathinfo mutex.
3521 */
3522 i_mdi_client_lock(ct, pip);
3523 MDI_PI_UNLOCK(pip);
3524
3525 /*
3526 * Wait till failover state is cleared
3527 */
3528 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3529 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3530
3531 /*
3532 * Mark the mdi_pathinfo node state as transient
3533 */
3534 MDI_PI_LOCK(pip);
3535 switch (state) {
3536 case MDI_PATHINFO_STATE_ONLINE:
3537 MDI_PI_SET_ONLINING(pip);
3538 break;
3539
3540 case MDI_PATHINFO_STATE_STANDBY:
3541 MDI_PI_SET_STANDBYING(pip);
3542 break;
3543
3544 case MDI_PATHINFO_STATE_FAULT:
3545 /*
3546 * Mark the pathinfo state as FAULTED
3547 */
3548 MDI_PI_SET_FAULTING(pip);
3549 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR);
3550 break;
3551
3552 case MDI_PATHINFO_STATE_OFFLINE:
3553 /*
3554 * ndi_devi_offline() cannot hold pip or ct locks.
3555 */
3556 MDI_PI_UNLOCK(pip);
3557
3558 /*
3559 * If this is a user initiated path online->offline operation
3560 * who's success would transition a client from DEGRADED to
3561 * FAILED then only proceed if we can offline the client first.
3562 */
3563 cdip = ct->ct_dip;
3564 if ((flag & NDI_USER_REQ) &&
3565 MDI_PI_IS_ONLINE(pip) &&
3566 (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) {
3567 i_mdi_client_unlock(ct);
3568 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN);
3569 if (rv != NDI_SUCCESS) {
3570 /*
3571 * Convert to MDI error code
3572 */
3573 switch (rv) {
3574 case NDI_BUSY:
3575 rv = MDI_BUSY;
3576 break;
3577 default:
3578 rv = MDI_FAILURE;
3579 break;
3580 }
3581 goto state_change_exit;
3582 } else {
3583 i_mdi_client_lock(ct, NULL);
3584 }
3585 }
3586 /*
3587 * Mark the mdi_pathinfo node state as transient
3588 */
3589 MDI_PI_LOCK(pip);
3590 MDI_PI_SET_OFFLINING(pip);
3591 break;
3592 }
3593 MDI_PI_UNLOCK(pip);
3594 MDI_CLIENT_UNSTABLE(ct);
3595 i_mdi_client_unlock(ct);
3596
3597 f = vh->vh_ops->vo_pi_state_change;
3598 if (f != NULL)
3599 rv = (*f)(vh->vh_dip, pip, state, 0, flag);
3600
3601 MDI_CLIENT_LOCK(ct);
3602 MDI_PI_LOCK(pip);
3603 if (rv == MDI_NOT_SUPPORTED) {
3604 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct);
3605 }
3606 if (rv != MDI_SUCCESS) {
3607 MDI_DEBUG(2, (MDI_WARN, ct->ct_dip,
3608 "vo_pi_state_change failed: rv %x", rv));
3609 }
3610 if (MDI_PI_IS_TRANSIENT(pip)) {
3611 if (rv == MDI_SUCCESS) {
3612 MDI_PI_CLEAR_TRANSIENT(pip);
3613 } else {
3614 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
3615 }
3616 }
3617
3618 /*
3619 * Wake anyone waiting for this mdi_pathinfo node
3620 */
3621 cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3622 MDI_PI_UNLOCK(pip);
3623
3624 /*
3625 * Mark the client device as stable
3626 */
3627 MDI_CLIENT_STABLE(ct);
3628 if (rv == MDI_SUCCESS) {
3629 if (ct->ct_unstable == 0) {
3630 cdip = ct->ct_dip;
3631
3632 /*
3633 * Onlining the mdi_pathinfo node will impact the
3634 * client state Update the client and dev_info node
3635 * state accordingly
3636 */
3637 rv = NDI_SUCCESS;
3638 i_mdi_client_update_state(ct);
3639 switch (MDI_CLIENT_STATE(ct)) {
3640 case MDI_CLIENT_STATE_OPTIMAL:
3641 case MDI_CLIENT_STATE_DEGRADED:
3642 if (cdip && !i_ddi_devi_attached(cdip) &&
3643 ((state == MDI_PATHINFO_STATE_ONLINE) ||
3644 (state == MDI_PATHINFO_STATE_STANDBY))) {
3645
3646 /*
3647 * Must do ndi_devi_online() through
3648 * hotplug thread for deferred
3649 * attach mechanism to work
3650 */
3651 MDI_CLIENT_UNLOCK(ct);
3652 rv = ndi_devi_online(cdip, 0);
3653 MDI_CLIENT_LOCK(ct);
3654 if ((rv != NDI_SUCCESS) &&
3655 (MDI_CLIENT_STATE(ct) ==
3656 MDI_CLIENT_STATE_DEGRADED)) {
3657 MDI_DEBUG(1, (MDI_WARN, cdip,
3658 "!ndi_devi_online failed "
3659 "error %x", rv));
3660 }
3661 rv = NDI_SUCCESS;
3662 }
3663 break;
3664
3665 case MDI_CLIENT_STATE_FAILED:
3666 /*
3667 * This is the last path case for
3668 * non-user initiated events.
3669 */
3670 if ((flag & NDI_USER_REQ) ||
3671 cdip == NULL || i_ddi_node_state(cdip) <
3672 DS_INITIALIZED)
3673 break;
3674
3675 MDI_CLIENT_UNLOCK(ct);
3676 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN |
3677 NDI_DEVI_GONE);
3678 MDI_CLIENT_LOCK(ct);
3679
3680 if (rv != NDI_SUCCESS) {
3681 /*
3682 * Reset client flags to online as the
3683 * path could not be offlined.
3684 */
3685 MDI_DEBUG(1, (MDI_WARN, cdip,
3686 "!ndi_devi_offline failed: %d",
3687 rv));
3688 MDI_CLIENT_SET_ONLINE(ct);
3689 }
3690 break;
3691 }
3692 /*
3693 * Convert to MDI error code
3694 */
3695 switch (rv) {
3696 case NDI_SUCCESS:
3697 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3698 i_mdi_report_path_state(ct, pip);
3699 rv = MDI_SUCCESS;
3700 break;
3701 case NDI_BUSY:
3702 rv = MDI_BUSY;
3703 break;
3704 default:
3705 rv = MDI_FAILURE;
3706 break;
3707 }
3708 }
3709 }
3710 MDI_CLIENT_UNLOCK(ct);
3711
3712 state_change_exit:
3713 /*
3714 * Mark the pHCI as stable again.
3715 */
3716 MDI_PHCI_LOCK(ph);
3717 MDI_PHCI_STABLE(ph);
3718 MDI_PHCI_UNLOCK(ph);
3719 return (rv);
3720 }
3721
3722 /*
3723 * mdi_pi_online():
3724 * Place the path_info node in the online state. The path is
3725 * now available to be selected by mdi_select_path() for
3726 * transporting I/O requests to client devices.
3727 * Return Values:
3728 * MDI_SUCCESS
3729 * MDI_FAILURE
3730 */
3731 int
3732 mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3733 {
3734 mdi_client_t *ct = MDI_PI(pip)->pi_client;
3735 int client_held = 0;
3736 int rv;
3737
3738 ASSERT(ct != NULL);
3739 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags);
3740 if (rv != MDI_SUCCESS)
3741 return (rv);
3742
3743 MDI_PI_LOCK(pip);
3744 if (MDI_PI(pip)->pi_pm_held == 0) {
3745 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3746 "i_mdi_pm_hold_pip %p", (void *)pip));
3747 i_mdi_pm_hold_pip(pip);
3748 client_held = 1;
3749 }
3750 MDI_PI_UNLOCK(pip);
3751
3752 if (client_held) {
3753 MDI_CLIENT_LOCK(ct);
3754 if (ct->ct_power_cnt == 0) {
3755 rv = i_mdi_power_all_phci(ct);
3756 }
3757
3758 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3759 "i_mdi_pm_hold_client %p", (void *)ct));
3760 i_mdi_pm_hold_client(ct, 1);
3761 MDI_CLIENT_UNLOCK(ct);
3762 }
3763
3764 return (rv);
3765 }
3766
3767 /*
3768 * mdi_pi_standby():
3769 * Place the mdi_pathinfo node in standby state
3770 *
3771 * Return Values:
3772 * MDI_SUCCESS
3773 * MDI_FAILURE
3774 */
3775 int
3776 mdi_pi_standby(mdi_pathinfo_t *pip, int flags)
3777 {
3778 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags));
3779 }
3780
3781 /*
3782 * mdi_pi_fault():
3783 * Place the mdi_pathinfo node in fault'ed state
3784 * Return Values:
3785 * MDI_SUCCESS
3786 * MDI_FAILURE
3787 */
3788 int
3789 mdi_pi_fault(mdi_pathinfo_t *pip, int flags)
3790 {
3791 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags));
3792 }
3793
3794 /*
3795 * mdi_pi_offline():
3796 * Offline a mdi_pathinfo node.
3797 * Return Values:
3798 * MDI_SUCCESS
3799 * MDI_FAILURE
3800 */
3801 int
3802 mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3803 {
3804 int ret, client_held = 0;
3805 mdi_client_t *ct;
3806
3807 /*
3808 * Original code overloaded NDI_DEVI_REMOVE to this interface, and
3809 * used it to mean "user initiated operation" (i.e. devctl). Callers
3810 * should now just use NDI_USER_REQ.
3811 */
3812 if (flags & NDI_DEVI_REMOVE) {
3813 flags &= ~NDI_DEVI_REMOVE;
3814 flags |= NDI_USER_REQ;
3815 }
3816
3817 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags);
3818
3819 if (ret == MDI_SUCCESS) {
3820 MDI_PI_LOCK(pip);
3821 if (MDI_PI(pip)->pi_pm_held) {
3822 client_held = 1;
3823 }
3824 MDI_PI_UNLOCK(pip);
3825
3826 if (client_held) {
3827 ct = MDI_PI(pip)->pi_client;
3828 MDI_CLIENT_LOCK(ct);
3829 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3830 "i_mdi_pm_rele_client\n"));
3831 i_mdi_pm_rele_client(ct, 1);
3832 MDI_CLIENT_UNLOCK(ct);
3833 }
3834 }
3835
3836 return (ret);
3837 }
3838
3839 /*
3840 * i_mdi_pi_offline():
3841 * Offline a mdi_pathinfo node and call the vHCI driver's callback
3842 */
3843 static int
3844 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3845 {
3846 dev_info_t *vdip = NULL;
3847 mdi_vhci_t *vh = NULL;
3848 mdi_client_t *ct = NULL;
3849 int (*f)();
3850 int rv;
3851
3852 MDI_PI_LOCK(pip);
3853 ct = MDI_PI(pip)->pi_client;
3854 ASSERT(ct != NULL);
3855
3856 while (MDI_PI(pip)->pi_ref_cnt != 0) {
3857 /*
3858 * Give a chance for pending I/Os to complete.
3859 */
3860 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3861 "!%d cmds still pending on path %s %p",
3862 MDI_PI(pip)->pi_ref_cnt, mdi_pi_spathname(pip),
3863 (void *)pip));
3864 if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
3865 &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
3866 TR_CLOCK_TICK) == -1) {
3867 /*
3868 * The timeout time reached without ref_cnt being zero
3869 * being signaled.
3870 */
3871 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3872 "!Timeout reached on path %s %p without the cond",
3873 mdi_pi_spathname(pip), (void *)pip));
3874 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3875 "!%d cmds still pending on path %s %p",
3876 MDI_PI(pip)->pi_ref_cnt,
3877 mdi_pi_spathname(pip), (void *)pip));
3878 }
3879 }
3880 vh = ct->ct_vhci;
3881 vdip = vh->vh_dip;
3882
3883 /*
3884 * Notify vHCI that has registered this event
3885 */
3886 ASSERT(vh->vh_ops);
3887 f = vh->vh_ops->vo_pi_state_change;
3888
3889 if (f != NULL) {
3890 MDI_PI_UNLOCK(pip);
3891 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0,
3892 flags)) != MDI_SUCCESS) {
3893 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3894 "!vo_path_offline failed: vdip %s%d %p: path %s %p",
3895 ddi_driver_name(vdip), ddi_get_instance(vdip),
3896 (void *)vdip, mdi_pi_spathname(pip), (void *)pip));
3897 }
3898 MDI_PI_LOCK(pip);
3899 }
3900
3901 /*
3902 * Set the mdi_pathinfo node state and clear the transient condition
3903 */
3904 MDI_PI_SET_OFFLINE(pip);
3905 cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3906 MDI_PI_UNLOCK(pip);
3907
3908 MDI_CLIENT_LOCK(ct);
3909 if (rv == MDI_SUCCESS) {
3910 if (ct->ct_unstable == 0) {
3911 dev_info_t *cdip = ct->ct_dip;
3912
3913 /*
3914 * Onlining the mdi_pathinfo node will impact the
3915 * client state Update the client and dev_info node
3916 * state accordingly
3917 */
3918 i_mdi_client_update_state(ct);
3919 rv = NDI_SUCCESS;
3920 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
3921 if (cdip &&
3922 (i_ddi_node_state(cdip) >=
3923 DS_INITIALIZED)) {
3924 MDI_CLIENT_UNLOCK(ct);
3925 rv = ndi_devi_offline(cdip,
3926 NDI_DEVFS_CLEAN);
3927 MDI_CLIENT_LOCK(ct);
3928 if (rv != NDI_SUCCESS) {
3929 /*
3930 * ndi_devi_offline failed.
3931 * Reset client flags to
3932 * online.
3933 */
3934 MDI_DEBUG(4, (MDI_WARN, cdip,
3935 "ndi_devi_offline failed: "
3936 "error %x", rv));
3937 MDI_CLIENT_SET_ONLINE(ct);
3938 }
3939 }
3940 }
3941 /*
3942 * Convert to MDI error code
3943 */
3944 switch (rv) {
3945 case NDI_SUCCESS:
3946 rv = MDI_SUCCESS;
3947 break;
3948 case NDI_BUSY:
3949 rv = MDI_BUSY;
3950 break;
3951 default:
3952 rv = MDI_FAILURE;
3953 break;
3954 }
3955 }
3956 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3957 i_mdi_report_path_state(ct, pip);
3958 }
3959
3960 MDI_CLIENT_UNLOCK(ct);
3961
3962 /*
3963 * Change in the mdi_pathinfo node state will impact the client state
3964 */
3965 MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
3966 "ct = %p pip = %p", (void *)ct, (void *)pip));
3967 return (rv);
3968 }
3969
3970 /*
3971 * i_mdi_pi_online():
3972 * Online a mdi_pathinfo node and call the vHCI driver's callback
3973 */
3974 static int
3975 i_mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3976 {
3977 mdi_vhci_t *vh = NULL;
3978 mdi_client_t *ct = NULL;
3979 mdi_phci_t *ph;
3980 int (*f)();
3981 int rv;
3982
3983 MDI_PI_LOCK(pip);
3984 ph = MDI_PI(pip)->pi_phci;
3985 vh = ph->ph_vhci;
3986 ct = MDI_PI(pip)->pi_client;
3987 MDI_PI_SET_ONLINING(pip)
3988 MDI_PI_UNLOCK(pip);
3989 f = vh->vh_ops->vo_pi_state_change;
3990 if (f != NULL)
3991 rv = (*f)(vh->vh_dip, pip, MDI_PATHINFO_STATE_ONLINE, 0,
3992 flags);
3993 MDI_CLIENT_LOCK(ct);
3994 MDI_PI_LOCK(pip);
3995 cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3996 MDI_PI_UNLOCK(pip);
3997 if (rv == MDI_SUCCESS) {
3998 dev_info_t *cdip = ct->ct_dip;
3999
4000 rv = MDI_SUCCESS;
4001 i_mdi_client_update_state(ct);
4002 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL ||
4003 MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4004 if (cdip && !i_ddi_devi_attached(cdip)) {
4005 MDI_CLIENT_UNLOCK(ct);
4006 rv = ndi_devi_online(cdip, 0);
4007 MDI_CLIENT_LOCK(ct);
4008 if ((rv != NDI_SUCCESS) &&
4009 (MDI_CLIENT_STATE(ct) ==
4010 MDI_CLIENT_STATE_DEGRADED)) {
4011 MDI_CLIENT_SET_OFFLINE(ct);
4012 }
4013 if (rv != NDI_SUCCESS) {
4014 /* Reset the path state */
4015 MDI_PI_LOCK(pip);
4016 MDI_PI(pip)->pi_state =
4017 MDI_PI_OLD_STATE(pip);
4018 MDI_PI_UNLOCK(pip);
4019 }
4020 }
4021 }
4022 switch (rv) {
4023 case NDI_SUCCESS:
4024 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
4025 i_mdi_report_path_state(ct, pip);
4026 rv = MDI_SUCCESS;
4027 break;
4028 case NDI_BUSY:
4029 rv = MDI_BUSY;
4030 break;
4031 default:
4032 rv = MDI_FAILURE;
4033 break;
4034 }
4035 } else {
4036 /* Reset the path state */
4037 MDI_PI_LOCK(pip);
4038 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
4039 MDI_PI_UNLOCK(pip);
4040 }
4041 MDI_CLIENT_UNLOCK(ct);
4042 return (rv);
4043 }
4044
4045 /*
4046 * mdi_pi_get_node_name():
4047 * Get the name associated with a mdi_pathinfo node.
4048 * Since pathinfo nodes are not directly named, we
4049 * return the node_name of the client.
4050 *
4051 * Return Values:
4052 * char *
4053 */
4054 char *
4055 mdi_pi_get_node_name(mdi_pathinfo_t *pip)
4056 {
4057 mdi_client_t *ct;
4058
4059 if (pip == NULL)
4060 return (NULL);
4061 ct = MDI_PI(pip)->pi_client;
4062 if ((ct == NULL) || (ct->ct_dip == NULL))
4063 return (NULL);
4064 return (ddi_node_name(ct->ct_dip));
4065 }
4066
4067 /*
4068 * mdi_pi_get_addr():
4069 * Get the unit address associated with a mdi_pathinfo node
4070 *
4071 * Return Values:
4072 * char *
4073 */
4074 char *
4075 mdi_pi_get_addr(mdi_pathinfo_t *pip)
4076 {
4077 if (pip == NULL)
4078 return (NULL);
4079
4080 return (MDI_PI(pip)->pi_addr);
4081 }
4082
4083 /*
4084 * mdi_pi_get_path_instance():
4085 * Get the 'path_instance' of a mdi_pathinfo node
4086 *
4087 * Return Values:
4088 * path_instance
4089 */
4090 int
4091 mdi_pi_get_path_instance(mdi_pathinfo_t *pip)
4092 {
4093 if (pip == NULL)
4094 return (0);
4095
4096 return (MDI_PI(pip)->pi_path_instance);
4097 }
4098
4099 /*
4100 * mdi_pi_pathname():
4101 * Return pointer to path to pathinfo node.
4102 */
4103 char *
4104 mdi_pi_pathname(mdi_pathinfo_t *pip)
4105 {
4106 if (pip == NULL)
4107 return (NULL);
4108 return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip)));
4109 }
4110
4111 /*
4112 * mdi_pi_spathname():
4113 * Return pointer to shortpath to pathinfo node. Used for debug
4114 * messages, so return "" instead of NULL when unknown.
4115 */
4116 char *
4117 mdi_pi_spathname(mdi_pathinfo_t *pip)
4118 {
4119 char *spath = "";
4120
4121 if (pip) {
4122 spath = mdi_pi_spathname_by_instance(
4123 mdi_pi_get_path_instance(pip));
4124 if (spath == NULL)
4125 spath = "";
4126 }
4127 return (spath);
4128 }
4129
4130 char *
4131 mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path)
4132 {
4133 char *obp_path = NULL;
4134 if ((pip == NULL) || (path == NULL))
4135 return (NULL);
4136
4137 if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) {
4138 (void) strcpy(path, obp_path);
4139 (void) mdi_prop_free(obp_path);
4140 } else {
4141 path = NULL;
4142 }
4143 return (path);
4144 }
4145
4146 int
4147 mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component)
4148 {
4149 dev_info_t *pdip;
4150 char *obp_path = NULL;
4151 int rc = MDI_FAILURE;
4152
4153 if (pip == NULL)
4154 return (MDI_FAILURE);
4155
4156 pdip = mdi_pi_get_phci(pip);
4157 if (pdip == NULL)
4158 return (MDI_FAILURE);
4159
4160 obp_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
4161
4162 if (ddi_pathname_obp(pdip, obp_path) == NULL) {
4163 (void) ddi_pathname(pdip, obp_path);
4164 }
4165
4166 if (component) {
4167 (void) strncat(obp_path, "/", MAXPATHLEN);
4168 (void) strncat(obp_path, component, MAXPATHLEN);
4169 }
4170 rc = mdi_prop_update_string(pip, "obp-path", obp_path);
4171
4172 if (obp_path)
4173 kmem_free(obp_path, MAXPATHLEN);
4174 return (rc);
4175 }
4176
4177 /*
4178 * mdi_pi_get_client():
4179 * Get the client devinfo associated with a mdi_pathinfo node
4180 *
4181 * Return Values:
4182 * Handle to client device dev_info node
4183 */
4184 dev_info_t *
4185 mdi_pi_get_client(mdi_pathinfo_t *pip)
4186 {
4187 dev_info_t *dip = NULL;
4188 if (pip) {
4189 dip = MDI_PI(pip)->pi_client->ct_dip;
4190 }
4191 return (dip);
4192 }
4193
4194 /*
4195 * mdi_pi_get_phci():
4196 * Get the pHCI devinfo associated with the mdi_pathinfo node
4197 * Return Values:
4198 * Handle to dev_info node
4199 */
4200 dev_info_t *
4201 mdi_pi_get_phci(mdi_pathinfo_t *pip)
4202 {
4203 dev_info_t *dip = NULL;
4204 mdi_phci_t *ph;
4205
4206 if (pip) {
4207 ph = MDI_PI(pip)->pi_phci;
4208 if (ph)
4209 dip = ph->ph_dip;
4210 }
4211 return (dip);
4212 }
4213
4214 /*
4215 * mdi_pi_get_client_private():
4216 * Get the client private information associated with the
4217 * mdi_pathinfo node
4218 */
4219 void *
4220 mdi_pi_get_client_private(mdi_pathinfo_t *pip)
4221 {
4222 void *cprivate = NULL;
4223 if (pip) {
4224 cprivate = MDI_PI(pip)->pi_cprivate;
4225 }
4226 return (cprivate);
4227 }
4228
4229 /*
4230 * mdi_pi_set_client_private():
4231 * Set the client private information in the mdi_pathinfo node
4232 */
4233 void
4234 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv)
4235 {
4236 if (pip) {
4237 MDI_PI(pip)->pi_cprivate = priv;
4238 }
4239 }
4240
4241 /*
4242 * mdi_pi_get_phci_private():
4243 * Get the pHCI private information associated with the
4244 * mdi_pathinfo node
4245 */
4246 caddr_t
4247 mdi_pi_get_phci_private(mdi_pathinfo_t *pip)
4248 {
4249 caddr_t pprivate = NULL;
4250
4251 if (pip) {
4252 pprivate = MDI_PI(pip)->pi_pprivate;
4253 }
4254 return (pprivate);
4255 }
4256
4257 /*
4258 * mdi_pi_set_phci_private():
4259 * Set the pHCI private information in the mdi_pathinfo node
4260 */
4261 void
4262 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv)
4263 {
4264 if (pip) {
4265 MDI_PI(pip)->pi_pprivate = priv;
4266 }
4267 }
4268
4269 /*
4270 * mdi_pi_get_state():
4271 * Get the mdi_pathinfo node state. Transient states are internal
4272 * and not provided to the users
4273 */
4274 mdi_pathinfo_state_t
4275 mdi_pi_get_state(mdi_pathinfo_t *pip)
4276 {
4277 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT;
4278
4279 if (pip) {
4280 if (MDI_PI_IS_TRANSIENT(pip)) {
4281 /*
4282 * mdi_pathinfo is in state transition. Return the
4283 * last good state.
4284 */
4285 state = MDI_PI_OLD_STATE(pip);
4286 } else {
4287 state = MDI_PI_STATE(pip);
4288 }
4289 }
4290 return (state);
4291 }
4292
4293 /*
4294 * mdi_pi_get_flags():
4295 * Get the mdi_pathinfo node flags.
4296 */
4297 uint_t
4298 mdi_pi_get_flags(mdi_pathinfo_t *pip)
4299 {
4300 return (pip ? MDI_PI(pip)->pi_flags : 0);
4301 }
4302
4303 /*
4304 * Note that the following function needs to be the new interface for
4305 * mdi_pi_get_state when mpxio gets integrated to ON.
4306 */
4307 int
4308 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state,
4309 uint32_t *ext_state)
4310 {
4311 *state = MDI_PATHINFO_STATE_INIT;
4312
4313 if (pip) {
4314 if (MDI_PI_IS_TRANSIENT(pip)) {
4315 /*
4316 * mdi_pathinfo is in state transition. Return the
4317 * last good state.
4318 */
4319 *state = MDI_PI_OLD_STATE(pip);
4320 *ext_state = MDI_PI_OLD_EXT_STATE(pip);
4321 } else {
4322 *state = MDI_PI_STATE(pip);
4323 *ext_state = MDI_PI_EXT_STATE(pip);
4324 }
4325 }
4326 return (MDI_SUCCESS);
4327 }
4328
4329 /*
4330 * mdi_pi_get_preferred:
4331 * Get the preferred path flag
4332 */
4333 int
4334 mdi_pi_get_preferred(mdi_pathinfo_t *pip)
4335 {
4336 if (pip) {
4337 return (MDI_PI(pip)->pi_preferred);
4338 }
4339 return (0);
4340 }
4341
4342 /*
4343 * mdi_pi_set_preferred:
4344 * Set the preferred path flag
4345 */
4346 void
4347 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred)
4348 {
4349 if (pip) {
4350 MDI_PI(pip)->pi_preferred = preferred;
4351 }
4352 }
4353
4354 /*
4355 * mdi_pi_set_state():
4356 * Set the mdi_pathinfo node state
4357 */
4358 void
4359 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state)
4360 {
4361 uint32_t ext_state;
4362
4363 if (pip) {
4364 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK;
4365 MDI_PI(pip)->pi_state = state;
4366 MDI_PI(pip)->pi_state |= ext_state;
4367
4368 /* Path has changed state, invalidate DINFOCACHE snap shot. */
4369 i_ddi_di_cache_invalidate();
4370 }
4371 }
4372
4373 /*
4374 * Property functions:
4375 */
4376 int
4377 i_map_nvlist_error_to_mdi(int val)
4378 {
4379 int rv;
4380
4381 switch (val) {
4382 case 0:
4383 rv = DDI_PROP_SUCCESS;
4384 break;
4385 case EINVAL:
4386 case ENOTSUP:
4387 rv = DDI_PROP_INVAL_ARG;
4388 break;
4389 case ENOMEM:
4390 rv = DDI_PROP_NO_MEMORY;
4391 break;
4392 default:
4393 rv = DDI_PROP_NOT_FOUND;
4394 break;
4395 }
4396 return (rv);
4397 }
4398
4399 /*
4400 * mdi_pi_get_next_prop():
4401 * Property walk function. The caller should hold mdi_pi_lock()
4402 * and release by calling mdi_pi_unlock() at the end of walk to
4403 * get a consistent value.
4404 */
4405 nvpair_t *
4406 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev)
4407 {
4408 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4409 return (NULL);
4410 }
4411 ASSERT(MDI_PI_LOCKED(pip));
4412 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev));
4413 }
4414
4415 /*
4416 * mdi_prop_remove():
4417 * Remove the named property from the named list.
4418 */
4419 int
4420 mdi_prop_remove(mdi_pathinfo_t *pip, char *name)
4421 {
4422 if (pip == NULL) {
4423 return (DDI_PROP_NOT_FOUND);
4424 }
4425 ASSERT(!MDI_PI_LOCKED(pip));
4426 MDI_PI_LOCK(pip);
4427 if (MDI_PI(pip)->pi_prop == NULL) {
4428 MDI_PI_UNLOCK(pip);
4429 return (DDI_PROP_NOT_FOUND);
4430 }
4431 if (name) {
4432 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name);
4433 } else {
4434 char nvp_name[MAXNAMELEN];
4435 nvpair_t *nvp;
4436 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL);
4437 while (nvp) {
4438 nvpair_t *next;
4439 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp);
4440 (void) snprintf(nvp_name, sizeof(nvp_name), "%s",
4441 nvpair_name(nvp));
4442 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop,
4443 nvp_name);
4444 nvp = next;
4445 }
4446 }
4447 MDI_PI_UNLOCK(pip);
4448 return (DDI_PROP_SUCCESS);
4449 }
4450
4451 /*
4452 * mdi_prop_size():
4453 * Get buffer size needed to pack the property data.
4454 * Caller should hold the mdi_pathinfo_t lock to get a consistent
4455 * buffer size.
4456 */
4457 int
4458 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp)
4459 {
4460 int rv;
4461 size_t bufsize;
4462
4463 *buflenp = 0;
4464 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4465 return (DDI_PROP_NOT_FOUND);
4466 }
4467 ASSERT(MDI_PI_LOCKED(pip));
4468 rv = nvlist_size(MDI_PI(pip)->pi_prop,
4469 &bufsize, NV_ENCODE_NATIVE);
4470 *buflenp = bufsize;
4471 return (i_map_nvlist_error_to_mdi(rv));
4472 }
4473
4474 /*
4475 * mdi_prop_pack():
4476 * pack the property list. The caller should hold the
4477 * mdi_pathinfo_t node to get a consistent data
4478 */
4479 int
4480 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen)
4481 {
4482 int rv;
4483 size_t bufsize;
4484
4485 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) {
4486 return (DDI_PROP_NOT_FOUND);
4487 }
4488
4489 ASSERT(MDI_PI_LOCKED(pip));
4490
4491 bufsize = buflen;
4492 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize,
4493 NV_ENCODE_NATIVE, KM_SLEEP);
4494
4495 return (i_map_nvlist_error_to_mdi(rv));
4496 }
4497
4498 /*
4499 * mdi_prop_update_byte():
4500 * Create/Update a byte property
4501 */
4502 int
4503 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data)
4504 {
4505 int rv;
4506
4507 if (pip == NULL) {
4508 return (DDI_PROP_INVAL_ARG);
4509 }
4510 ASSERT(!MDI_PI_LOCKED(pip));
4511 MDI_PI_LOCK(pip);
4512 if (MDI_PI(pip)->pi_prop == NULL) {
4513 MDI_PI_UNLOCK(pip);
4514 return (DDI_PROP_NOT_FOUND);
4515 }
4516 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data);
4517 MDI_PI_UNLOCK(pip);
4518 return (i_map_nvlist_error_to_mdi(rv));
4519 }
4520
4521 /*
4522 * mdi_prop_update_byte_array():
4523 * Create/Update a byte array property
4524 */
4525 int
4526 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data,
4527 uint_t nelements)
4528 {
4529 int rv;
4530
4531 if (pip == NULL) {
4532 return (DDI_PROP_INVAL_ARG);
4533 }
4534 ASSERT(!MDI_PI_LOCKED(pip));
4535 MDI_PI_LOCK(pip);
4536 if (MDI_PI(pip)->pi_prop == NULL) {
4537 MDI_PI_UNLOCK(pip);
4538 return (DDI_PROP_NOT_FOUND);
4539 }
4540 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements);
4541 MDI_PI_UNLOCK(pip);
4542 return (i_map_nvlist_error_to_mdi(rv));
4543 }
4544
4545 /*
4546 * mdi_prop_update_int():
4547 * Create/Update a 32 bit integer property
4548 */
4549 int
4550 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data)
4551 {
4552 int rv;
4553
4554 if (pip == NULL) {
4555 return (DDI_PROP_INVAL_ARG);
4556 }
4557 ASSERT(!MDI_PI_LOCKED(pip));
4558 MDI_PI_LOCK(pip);
4559 if (MDI_PI(pip)->pi_prop == NULL) {
4560 MDI_PI_UNLOCK(pip);
4561 return (DDI_PROP_NOT_FOUND);
4562 }
4563 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data);
4564 MDI_PI_UNLOCK(pip);
4565 return (i_map_nvlist_error_to_mdi(rv));
4566 }
4567
4568 /*
4569 * mdi_prop_update_int64():
4570 * Create/Update a 64 bit integer property
4571 */
4572 int
4573 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data)
4574 {
4575 int rv;
4576
4577 if (pip == NULL) {
4578 return (DDI_PROP_INVAL_ARG);
4579 }
4580 ASSERT(!MDI_PI_LOCKED(pip));
4581 MDI_PI_LOCK(pip);
4582 if (MDI_PI(pip)->pi_prop == NULL) {
4583 MDI_PI_UNLOCK(pip);
4584 return (DDI_PROP_NOT_FOUND);
4585 }
4586 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data);
4587 MDI_PI_UNLOCK(pip);
4588 return (i_map_nvlist_error_to_mdi(rv));
4589 }
4590
4591 /*
4592 * mdi_prop_update_int_array():
4593 * Create/Update a int array property
4594 */
4595 int
4596 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data,
4597 uint_t nelements)
4598 {
4599 int rv;
4600
4601 if (pip == NULL) {
4602 return (DDI_PROP_INVAL_ARG);
4603 }
4604 ASSERT(!MDI_PI_LOCKED(pip));
4605 MDI_PI_LOCK(pip);
4606 if (MDI_PI(pip)->pi_prop == NULL) {
4607 MDI_PI_UNLOCK(pip);
4608 return (DDI_PROP_NOT_FOUND);
4609 }
4610 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data,
4611 nelements);
4612 MDI_PI_UNLOCK(pip);
4613 return (i_map_nvlist_error_to_mdi(rv));
4614 }
4615
4616 /*
4617 * mdi_prop_update_string():
4618 * Create/Update a string property
4619 */
4620 int
4621 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data)
4622 {
4623 int rv;
4624
4625 if (pip == NULL) {
4626 return (DDI_PROP_INVAL_ARG);
4627 }
4628 ASSERT(!MDI_PI_LOCKED(pip));
4629 MDI_PI_LOCK(pip);
4630 if (MDI_PI(pip)->pi_prop == NULL) {
4631 MDI_PI_UNLOCK(pip);
4632 return (DDI_PROP_NOT_FOUND);
4633 }
4634 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data);
4635 MDI_PI_UNLOCK(pip);
4636 return (i_map_nvlist_error_to_mdi(rv));
4637 }
4638
4639 /*
4640 * mdi_prop_update_string_array():
4641 * Create/Update a string array property
4642 */
4643 int
4644 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data,
4645 uint_t nelements)
4646 {
4647 int rv;
4648
4649 if (pip == NULL) {
4650 return (DDI_PROP_INVAL_ARG);
4651 }
4652 ASSERT(!MDI_PI_LOCKED(pip));
4653 MDI_PI_LOCK(pip);
4654 if (MDI_PI(pip)->pi_prop == NULL) {
4655 MDI_PI_UNLOCK(pip);
4656 return (DDI_PROP_NOT_FOUND);
4657 }
4658 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data,
4659 nelements);
4660 MDI_PI_UNLOCK(pip);
4661 return (i_map_nvlist_error_to_mdi(rv));
4662 }
4663
4664 /*
4665 * mdi_prop_lookup_byte():
4666 * Look for byte property identified by name. The data returned
4667 * is the actual property and valid as long as mdi_pathinfo_t node
4668 * is alive.
4669 */
4670 int
4671 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data)
4672 {
4673 int rv;
4674
4675 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4676 return (DDI_PROP_NOT_FOUND);
4677 }
4678 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data);
4679 return (i_map_nvlist_error_to_mdi(rv));
4680 }
4681
4682
4683 /*
4684 * mdi_prop_lookup_byte_array():
4685 * Look for byte array property identified by name. The data
4686 * returned is the actual property and valid as long as
4687 * mdi_pathinfo_t node is alive.
4688 */
4689 int
4690 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data,
4691 uint_t *nelements)
4692 {
4693 int rv;
4694
4695 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4696 return (DDI_PROP_NOT_FOUND);
4697 }
4698 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data,
4699 nelements);
4700 return (i_map_nvlist_error_to_mdi(rv));
4701 }
4702
4703 /*
4704 * mdi_prop_lookup_int():
4705 * Look for int property identified by name. The data returned
4706 * is the actual property and valid as long as mdi_pathinfo_t
4707 * node is alive.
4708 */
4709 int
4710 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data)
4711 {
4712 int rv;
4713
4714 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4715 return (DDI_PROP_NOT_FOUND);
4716 }
4717 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data);
4718 return (i_map_nvlist_error_to_mdi(rv));
4719 }
4720
4721 /*
4722 * mdi_prop_lookup_int64():
4723 * Look for int64 property identified by name. The data returned
4724 * is the actual property and valid as long as mdi_pathinfo_t node
4725 * is alive.
4726 */
4727 int
4728 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data)
4729 {
4730 int rv;
4731 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4732 return (DDI_PROP_NOT_FOUND);
4733 }
4734 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data);
4735 return (i_map_nvlist_error_to_mdi(rv));
4736 }
4737
4738 /*
4739 * mdi_prop_lookup_int_array():
4740 * Look for int array property identified by name. The data
4741 * returned is the actual property and valid as long as
4742 * mdi_pathinfo_t node is alive.
4743 */
4744 int
4745 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data,
4746 uint_t *nelements)
4747 {
4748 int rv;
4749
4750 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4751 return (DDI_PROP_NOT_FOUND);
4752 }
4753 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name,
4754 (int32_t **)data, nelements);
4755 return (i_map_nvlist_error_to_mdi(rv));
4756 }
4757
4758 /*
4759 * mdi_prop_lookup_string():
4760 * Look for string property identified by name. The data
4761 * returned is the actual property and valid as long as
4762 * mdi_pathinfo_t node is alive.
4763 */
4764 int
4765 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data)
4766 {
4767 int rv;
4768
4769 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4770 return (DDI_PROP_NOT_FOUND);
4771 }
4772 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data);
4773 return (i_map_nvlist_error_to_mdi(rv));
4774 }
4775
4776 /*
4777 * mdi_prop_lookup_string_array():
4778 * Look for string array property identified by name. The data
4779 * returned is the actual property and valid as long as
4780 * mdi_pathinfo_t node is alive.
4781 */
4782 int
4783 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data,
4784 uint_t *nelements)
4785 {
4786 int rv;
4787
4788 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4789 return (DDI_PROP_NOT_FOUND);
4790 }
4791 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data,
4792 nelements);
4793 return (i_map_nvlist_error_to_mdi(rv));
4794 }
4795
4796 /*
4797 * mdi_prop_free():
4798 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx()
4799 * functions return the pointer to actual property data and not a
4800 * copy of it. So the data returned is valid as long as
4801 * mdi_pathinfo_t node is valid.
4802 */
4803 /*ARGSUSED*/
4804 int
4805 mdi_prop_free(void *data)
4806 {
4807 return (DDI_PROP_SUCCESS);
4808 }
4809
4810 /*ARGSUSED*/
4811 static void
4812 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip)
4813 {
4814 char *ct_path;
4815 char *ct_status;
4816 char *status;
4817 dev_info_t *cdip = ct->ct_dip;
4818 char lb_buf[64];
4819 int report_lb_c = 0, report_lb_p = 0;
4820
4821 ASSERT(MDI_CLIENT_LOCKED(ct));
4822 if ((cdip == NULL) || (ddi_get_instance(cdip) == -1) ||
4823 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) {
4824 return;
4825 }
4826 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) {
4827 ct_status = "optimal";
4828 report_lb_c = 1;
4829 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4830 ct_status = "degraded";
4831 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
4832 ct_status = "failed";
4833 } else {
4834 ct_status = "unknown";
4835 }
4836
4837 lb_buf[0] = 0; /* not interested in load balancing config */
4838
4839 if (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)) {
4840 status = "removed";
4841 } else if (MDI_PI_IS_OFFLINE(pip)) {
4842 status = "offline";
4843 } else if (MDI_PI_IS_ONLINE(pip)) {
4844 status = "online";
4845 report_lb_p = 1;
4846 } else if (MDI_PI_IS_STANDBY(pip)) {
4847 status = "standby";
4848 } else if (MDI_PI_IS_FAULT(pip)) {
4849 status = "faulted";
4850 } else {
4851 status = "unknown";
4852 }
4853
4854 if (cdip != NULL) {
4855 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4856
4857 if (report_lb_c && report_lb_p) {
4858 if (ct->ct_lb == LOAD_BALANCE_LBA) {
4859 (void) snprintf(lb_buf, sizeof (lb_buf),
4860 "%s, region-size: %d", mdi_load_balance_lba,
4861 ct->ct_lb_args->region_size);
4862 } else if (ct->ct_lb == LOAD_BALANCE_NONE) {
4863 (void) snprintf(lb_buf, sizeof (lb_buf),
4864 "%s", mdi_load_balance_none);
4865 } else {
4866 (void) snprintf(lb_buf, sizeof (lb_buf), "%s",
4867 mdi_load_balance_rr);
4868 }
4869
4870 dev_err(cdip, CE_CONT, "!multipath status: %s: "
4871 "path %d %s is %s; load balancing: %s\n",
4872 ct_status, mdi_pi_get_path_instance(pip),
4873 mdi_pi_spathname(pip), status, lb_buf);
4874 } else {
4875 dev_err(cdip, CE_CONT,
4876 "!multipath status: %s: path %d %s is %s\n",
4877 ct_status, mdi_pi_get_path_instance(pip),
4878 mdi_pi_spathname(pip), status);
4879 }
4880
4881 kmem_free(ct_path, MAXPATHLEN);
4882 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct);
4883 }
4884 }
4885
4886 #ifdef DEBUG
4887 /*
4888 * i_mdi_log():
4889 * Utility function for error message management
4890 *
4891 * NOTE: Implementation takes care of trailing \n for cmn_err,
4892 * MDI_DEBUG should not terminate fmt strings with \n.
4893 *
4894 * NOTE: If the level is >= 2, and there is no leading !?^
4895 * then a leading ! is implied (but can be overriden via
4896 * mdi_debug_consoleonly). If you are using kmdb on the console,
4897 * consider setting mdi_debug_consoleonly to 1 as an aid.
4898 */
4899 /*PRINTFLIKE4*/
4900 static void
4901 i_mdi_log(int level, const char *func, dev_info_t *dip, const char *fmt, ...)
4902 {
4903 char name[MAXNAMELEN];
4904 char buf[512];
4905 char *bp;
4906 va_list ap;
4907 int log_only = 0;
4908 int boot_only = 0;
4909 int console_only = 0;
4910
4911 if (dip) {
4912 (void) snprintf(name, sizeof(name), "%s%d: ",
4913 ddi_driver_name(dip), ddi_get_instance(dip));
4914 } else {
4915 name[0] = 0;
4916 }
4917
4918 va_start(ap, fmt);
4919 (void) vsnprintf(buf, sizeof(buf), fmt, ap);
4920 va_end(ap);
4921
4922 switch (buf[0]) {
4923 case '!':
4924 bp = &buf[1];
4925 log_only = 1;
4926 break;
4927 case '?':
4928 bp = &buf[1];
4929 boot_only = 1;
4930 break;
4931 case '^':
4932 bp = &buf[1];
4933 console_only = 1;
4934 break;
4935 default:
4936 if (level >= 2)
4937 log_only = 1; /* ! implied */
4938 bp = buf;
4939 break;
4940 }
4941 if (mdi_debug_logonly) {
4942 log_only = 1;
4943 boot_only = 0;
4944 console_only = 0;
4945 }
4946 if (mdi_debug_consoleonly) {
4947 log_only = 0;
4948 boot_only = 0;
4949 console_only = 1;
4950 level = CE_NOTE;
4951 goto console;
4952 }
4953
4954 switch (level) {
4955 case CE_NOTE:
4956 level = CE_CONT;
4957 /* FALLTHROUGH */
4958 case CE_CONT:
4959 if (boot_only) {
4960 cmn_err(level, "?mdi: %s%s: %s\n", name, func, bp);
4961 } else if (console_only) {
4962 cmn_err(level, "^mdi: %s%s: %s\n", name, func, bp);
4963 } else if (log_only) {
4964 cmn_err(level, "!mdi: %s%s: %s\n", name, func, bp);
4965 } else {
4966 cmn_err(level, "mdi: %s%s: %s\n", name, func, bp);
4967 }
4968 break;
4969
4970 case CE_WARN:
4971 case CE_PANIC:
4972 console:
4973 if (boot_only) {
4974 cmn_err(level, "?mdi: %s%s: %s", name, func, bp);
4975 } else if (console_only) {
4976 cmn_err(level, "^mdi: %s%s: %s", name, func, bp);
4977 } else if (log_only) {
4978 cmn_err(level, "!mdi: %s%s: %s", name, func, bp);
4979 } else {
4980 cmn_err(level, "mdi: %s%s: %s", name, func, bp);
4981 }
4982 break;
4983 default:
4984 cmn_err(level, "mdi: %s%s", name, bp);
4985 break;
4986 }
4987 }
4988 #endif /* DEBUG */
4989
4990 void
4991 i_mdi_client_online(dev_info_t *ct_dip)
4992 {
4993 mdi_client_t *ct;
4994
4995 /*
4996 * Client online notification. Mark client state as online
4997 * restore our binding with dev_info node
4998 */
4999 ct = i_devi_get_client(ct_dip);
5000 ASSERT(ct != NULL);
5001 MDI_CLIENT_LOCK(ct);
5002 MDI_CLIENT_SET_ONLINE(ct);
5003 /* catch for any memory leaks */
5004 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip));
5005 ct->ct_dip = ct_dip;
5006
5007 if (ct->ct_power_cnt == 0)
5008 (void) i_mdi_power_all_phci(ct);
5009
5010 MDI_DEBUG(4, (MDI_NOTE, ct_dip,
5011 "i_mdi_pm_hold_client %p", (void *)ct));
5012 i_mdi_pm_hold_client(ct, 1);
5013
5014 MDI_CLIENT_UNLOCK(ct);
5015 }
5016
5017 void
5018 i_mdi_phci_online(dev_info_t *ph_dip)
5019 {
5020 mdi_phci_t *ph;
5021
5022 /* pHCI online notification. Mark state accordingly */
5023 ph = i_devi_get_phci(ph_dip);
5024 ASSERT(ph != NULL);
5025 MDI_PHCI_LOCK(ph);
5026 MDI_PHCI_SET_ONLINE(ph);
5027 MDI_PHCI_UNLOCK(ph);
5028 }
5029
5030 /*
5031 * mdi_devi_online():
5032 * Online notification from NDI framework on pHCI/client
5033 * device online.
5034 * Return Values:
5035 * NDI_SUCCESS
5036 * MDI_FAILURE
5037 */
5038 /*ARGSUSED*/
5039 int
5040 mdi_devi_online(dev_info_t *dip, uint_t flags)
5041 {
5042 if (MDI_PHCI(dip)) {
5043 i_mdi_phci_online(dip);
5044 }
5045
5046 if (MDI_CLIENT(dip)) {
5047 i_mdi_client_online(dip);
5048 }
5049 return (NDI_SUCCESS);
5050 }
5051
5052 /*
5053 * mdi_devi_offline():
5054 * Offline notification from NDI framework on pHCI/Client device
5055 * offline.
5056 *
5057 * Return Values:
5058 * NDI_SUCCESS
5059 * NDI_FAILURE
5060 */
5061 /*ARGSUSED*/
5062 int
5063 mdi_devi_offline(dev_info_t *dip, uint_t flags)
5064 {
5065 int rv = NDI_SUCCESS;
5066
5067 if (MDI_CLIENT(dip)) {
5068 rv = i_mdi_client_offline(dip, flags);
5069 if (rv != NDI_SUCCESS)
5070 return (rv);
5071 }
5072
5073 if (MDI_PHCI(dip)) {
5074 rv = i_mdi_phci_offline(dip, flags);
5075
5076 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) {
5077 /* set client back online */
5078 i_mdi_client_online(dip);
5079 }
5080 }
5081
5082 return (rv);
5083 }
5084
5085 /*ARGSUSED*/
5086 static int
5087 i_mdi_phci_offline(dev_info_t *dip, uint_t flags)
5088 {
5089 int rv = NDI_SUCCESS;
5090 mdi_phci_t *ph;
5091 mdi_client_t *ct;
5092 mdi_pathinfo_t *pip;
5093 mdi_pathinfo_t *next;
5094 mdi_pathinfo_t *failed_pip = NULL;
5095 dev_info_t *cdip;
5096
5097 /*
5098 * pHCI component offline notification
5099 * Make sure that this pHCI instance is free to be offlined.
5100 * If it is OK to proceed, Offline and remove all the child
5101 * mdi_pathinfo nodes. This process automatically offlines
5102 * corresponding client devices, for which this pHCI provides
5103 * critical services.
5104 */
5105 ph = i_devi_get_phci(dip);
5106 MDI_DEBUG(2, (MDI_NOTE, dip,
5107 "called %p %p", (void *)dip, (void *)ph));
5108 if (ph == NULL) {
5109 return (rv);
5110 }
5111
5112 MDI_PHCI_LOCK(ph);
5113
5114 if (MDI_PHCI_IS_OFFLINE(ph)) {
5115 MDI_DEBUG(1, (MDI_WARN, dip,
5116 "!pHCI already offlined: %p", (void *)dip));
5117 MDI_PHCI_UNLOCK(ph);
5118 return (NDI_SUCCESS);
5119 }
5120
5121 /*
5122 * Check to see if the pHCI can be offlined
5123 */
5124 if (ph->ph_unstable) {
5125 MDI_DEBUG(1, (MDI_WARN, dip,
5126 "!One or more target devices are in transient state. "
5127 "This device can not be removed at this moment. "
5128 "Please try again later."));
5129 MDI_PHCI_UNLOCK(ph);
5130 return (NDI_BUSY);
5131 }
5132
5133 pip = ph->ph_path_head;
5134 while (pip != NULL) {
5135 MDI_PI_LOCK(pip);
5136 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5137
5138 /*
5139 * The mdi_pathinfo state is OK. Check the client state.
5140 * If failover in progress fail the pHCI from offlining
5141 */
5142 ct = MDI_PI(pip)->pi_client;
5143 i_mdi_client_lock(ct, pip);
5144 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5145 (ct->ct_unstable)) {
5146 /*
5147 * Failover is in progress, Fail the DR
5148 */
5149 MDI_DEBUG(1, (MDI_WARN, dip,
5150 "!pHCI device is busy. "
5151 "This device can not be removed at this moment. "
5152 "Please try again later."));
5153 MDI_PI_UNLOCK(pip);
5154 i_mdi_client_unlock(ct);
5155 MDI_PHCI_UNLOCK(ph);
5156 return (NDI_BUSY);
5157 }
5158 MDI_PI_UNLOCK(pip);
5159
5160 /*
5161 * Check to see of we are removing the last path of this
5162 * client device...
5163 */
5164 cdip = ct->ct_dip;
5165 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5166 (i_mdi_client_compute_state(ct, ph) ==
5167 MDI_CLIENT_STATE_FAILED)) {
5168 i_mdi_client_unlock(ct);
5169 MDI_PHCI_UNLOCK(ph);
5170 if (ndi_devi_offline(cdip,
5171 NDI_DEVFS_CLEAN) != NDI_SUCCESS) {
5172 /*
5173 * ndi_devi_offline() failed.
5174 * This pHCI provides the critical path
5175 * to one or more client devices.
5176 * Return busy.
5177 */
5178 MDI_PHCI_LOCK(ph);
5179 MDI_DEBUG(1, (MDI_WARN, dip,
5180 "!pHCI device is busy. "
5181 "This device can not be removed at this "
5182 "moment. Please try again later."));
5183 failed_pip = pip;
5184 break;
5185 } else {
5186 MDI_PHCI_LOCK(ph);
5187 pip = next;
5188 }
5189 } else {
5190 i_mdi_client_unlock(ct);
5191 pip = next;
5192 }
5193 }
5194
5195 if (failed_pip) {
5196 pip = ph->ph_path_head;
5197 while (pip != failed_pip) {
5198 MDI_PI_LOCK(pip);
5199 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5200 ct = MDI_PI(pip)->pi_client;
5201 i_mdi_client_lock(ct, pip);
5202 cdip = ct->ct_dip;
5203 switch (MDI_CLIENT_STATE(ct)) {
5204 case MDI_CLIENT_STATE_OPTIMAL:
5205 case MDI_CLIENT_STATE_DEGRADED:
5206 if (cdip) {
5207 MDI_PI_UNLOCK(pip);
5208 i_mdi_client_unlock(ct);
5209 MDI_PHCI_UNLOCK(ph);
5210 (void) ndi_devi_online(cdip, 0);
5211 MDI_PHCI_LOCK(ph);
5212 pip = next;
5213 continue;
5214 }
5215 break;
5216
5217 case MDI_CLIENT_STATE_FAILED:
5218 if (cdip) {
5219 MDI_PI_UNLOCK(pip);
5220 i_mdi_client_unlock(ct);
5221 MDI_PHCI_UNLOCK(ph);
5222 (void) ndi_devi_offline(cdip,
5223 NDI_DEVFS_CLEAN);
5224 MDI_PHCI_LOCK(ph);
5225 pip = next;
5226 continue;
5227 }
5228 break;
5229 }
5230 MDI_PI_UNLOCK(pip);
5231 i_mdi_client_unlock(ct);
5232 pip = next;
5233 }
5234 MDI_PHCI_UNLOCK(ph);
5235 return (NDI_BUSY);
5236 }
5237
5238 /*
5239 * Mark the pHCI as offline
5240 */
5241 MDI_PHCI_SET_OFFLINE(ph);
5242
5243 /*
5244 * Mark the child mdi_pathinfo nodes as transient
5245 */
5246 pip = ph->ph_path_head;
5247 while (pip != NULL) {
5248 MDI_PI_LOCK(pip);
5249 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5250 MDI_PI_SET_OFFLINING(pip);
5251 MDI_PI_UNLOCK(pip);
5252 pip = next;
5253 }
5254 MDI_PHCI_UNLOCK(ph);
5255 /*
5256 * Give a chance for any pending commands to execute
5257 */
5258 delay_random(mdi_delay);
5259 MDI_PHCI_LOCK(ph);
5260 pip = ph->ph_path_head;
5261 while (pip != NULL) {
5262 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5263 (void) i_mdi_pi_offline(pip, flags);
5264 MDI_PI_LOCK(pip);
5265 ct = MDI_PI(pip)->pi_client;
5266 if (!MDI_PI_IS_OFFLINE(pip)) {
5267 MDI_DEBUG(1, (MDI_WARN, dip,
5268 "!pHCI device is busy. "
5269 "This device can not be removed at this moment. "
5270 "Please try again later."));
5271 MDI_PI_UNLOCK(pip);
5272 MDI_PHCI_SET_ONLINE(ph);
5273 MDI_PHCI_UNLOCK(ph);
5274 return (NDI_BUSY);
5275 }
5276 MDI_PI_UNLOCK(pip);
5277 pip = next;
5278 }
5279 MDI_PHCI_UNLOCK(ph);
5280
5281 return (rv);
5282 }
5283
5284 void
5285 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array)
5286 {
5287 mdi_phci_t *ph;
5288 mdi_client_t *ct;
5289 mdi_pathinfo_t *pip;
5290 mdi_pathinfo_t *next;
5291 dev_info_t *cdip;
5292
5293 if (!MDI_PHCI(dip))
5294 return;
5295
5296 ph = i_devi_get_phci(dip);
5297 if (ph == NULL) {
5298 return;
5299 }
5300
5301 MDI_PHCI_LOCK(ph);
5302
5303 if (MDI_PHCI_IS_OFFLINE(ph)) {
5304 /* has no last path */
5305 MDI_PHCI_UNLOCK(ph);
5306 return;
5307 }
5308
5309 pip = ph->ph_path_head;
5310 while (pip != NULL) {
5311 MDI_PI_LOCK(pip);
5312 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5313
5314 ct = MDI_PI(pip)->pi_client;
5315 i_mdi_client_lock(ct, pip);
5316 MDI_PI_UNLOCK(pip);
5317
5318 cdip = ct->ct_dip;
5319 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5320 (i_mdi_client_compute_state(ct, ph) ==
5321 MDI_CLIENT_STATE_FAILED)) {
5322 /* Last path. Mark client dip as retiring */
5323 i_mdi_client_unlock(ct);
5324 MDI_PHCI_UNLOCK(ph);
5325 (void) e_ddi_mark_retiring(cdip, cons_array);
5326 MDI_PHCI_LOCK(ph);
5327 pip = next;
5328 } else {
5329 i_mdi_client_unlock(ct);
5330 pip = next;
5331 }
5332 }
5333
5334 MDI_PHCI_UNLOCK(ph);
5335
5336 return;
5337 }
5338
5339 void
5340 mdi_phci_retire_notify(dev_info_t *dip, int *constraint)
5341 {
5342 mdi_phci_t *ph;
5343 mdi_client_t *ct;
5344 mdi_pathinfo_t *pip;
5345 mdi_pathinfo_t *next;
5346 dev_info_t *cdip;
5347
5348 if (!MDI_PHCI(dip))
5349 return;
5350
5351 ph = i_devi_get_phci(dip);
5352 if (ph == NULL)
5353 return;
5354
5355 MDI_PHCI_LOCK(ph);
5356
5357 if (MDI_PHCI_IS_OFFLINE(ph)) {
5358 MDI_PHCI_UNLOCK(ph);
5359 /* not last path */
5360 return;
5361 }
5362
5363 if (ph->ph_unstable) {
5364 MDI_PHCI_UNLOCK(ph);
5365 /* can't check for constraints */
5366 *constraint = 0;
5367 return;
5368 }
5369
5370 pip = ph->ph_path_head;
5371 while (pip != NULL) {
5372 MDI_PI_LOCK(pip);
5373 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5374
5375 /*
5376 * The mdi_pathinfo state is OK. Check the client state.
5377 * If failover in progress fail the pHCI from offlining
5378 */
5379 ct = MDI_PI(pip)->pi_client;
5380 i_mdi_client_lock(ct, pip);
5381 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5382 (ct->ct_unstable)) {
5383 /*
5384 * Failover is in progress, can't check for constraints
5385 */
5386 MDI_PI_UNLOCK(pip);
5387 i_mdi_client_unlock(ct);
5388 MDI_PHCI_UNLOCK(ph);
5389 *constraint = 0;
5390 return;
5391 }
5392 MDI_PI_UNLOCK(pip);
5393
5394 /*
5395 * Check to see of we are retiring the last path of this
5396 * client device...
5397 */
5398 cdip = ct->ct_dip;
5399 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5400 (i_mdi_client_compute_state(ct, ph) ==
5401 MDI_CLIENT_STATE_FAILED)) {
5402 i_mdi_client_unlock(ct);
5403 MDI_PHCI_UNLOCK(ph);
5404 (void) e_ddi_retire_notify(cdip, constraint);
5405 MDI_PHCI_LOCK(ph);
5406 pip = next;
5407 } else {
5408 i_mdi_client_unlock(ct);
5409 pip = next;
5410 }
5411 }
5412
5413 MDI_PHCI_UNLOCK(ph);
5414
5415 return;
5416 }
5417
5418 /*
5419 * offline the path(s) hanging off the pHCI. If the
5420 * last path to any client, check that constraints
5421 * have been applied.
5422 *
5423 * If constraint is 0, we aren't going to retire the
5424 * pHCI. However we still need to go through the paths
5425 * calling e_ddi_retire_finalize() to clear their
5426 * contract barriers.
5427 */
5428 void
5429 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only, void *constraint)
5430 {
5431 mdi_phci_t *ph;
5432 mdi_client_t *ct;
5433 mdi_pathinfo_t *pip;
5434 mdi_pathinfo_t *next;
5435 dev_info_t *cdip;
5436 int unstable = 0;
5437 int tmp_constraint;
5438
5439 if (!MDI_PHCI(dip))
5440 return;
5441
5442 ph = i_devi_get_phci(dip);
5443 if (ph == NULL) {
5444 /* no last path and no pips */
5445 return;
5446 }
5447
5448 MDI_PHCI_LOCK(ph);
5449
5450 if (MDI_PHCI_IS_OFFLINE(ph)) {
5451 MDI_PHCI_UNLOCK(ph);
5452 /* no last path and no pips */
5453 return;
5454 }
5455
5456 /*
5457 * Check to see if the pHCI can be offlined
5458 */
5459 if (ph->ph_unstable) {
5460 unstable = 1;
5461 }
5462
5463 pip = ph->ph_path_head;
5464 while (pip != NULL) {
5465 MDI_PI_LOCK(pip);
5466 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5467
5468 /*
5469 * if failover in progress fail the pHCI from offlining
5470 */
5471 ct = MDI_PI(pip)->pi_client;
5472 i_mdi_client_lock(ct, pip);
5473 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5474 (ct->ct_unstable)) {
5475 unstable = 1;
5476 }
5477 MDI_PI_UNLOCK(pip);
5478
5479 /*
5480 * Check to see of we are removing the last path of this
5481 * client device...
5482 */
5483 cdip = ct->ct_dip;
5484 if (!phci_only && cdip &&
5485 (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5486 (i_mdi_client_compute_state(ct, ph) ==
5487 MDI_CLIENT_STATE_FAILED)) {
5488 i_mdi_client_unlock(ct);
5489 MDI_PHCI_UNLOCK(ph);
5490 /*
5491 * This is the last path to this client.
5492 *
5493 * Constraint will only be set to 1 if this client can
5494 * be retired (as already determined by
5495 * mdi_phci_retire_notify). However we don't actually
5496 * need to retire the client (we just retire the last
5497 * path - MPXIO will then fail all I/Os to the client).
5498 * But we still need to call e_ddi_retire_finalize so
5499 * the contract barriers can be cleared. Therefore we
5500 * temporarily set constraint = 0 so that the client
5501 * dip is not retired.
5502 */
5503 tmp_constraint = 0;
5504 (void) e_ddi_retire_finalize(cdip, &tmp_constraint);
5505 MDI_PHCI_LOCK(ph);
5506 pip = next;
5507 } else {
5508 i_mdi_client_unlock(ct);
5509 pip = next;
5510 }
5511 }
5512
5513 if (!phci_only && *((int *)constraint) == 0) {
5514 MDI_PHCI_UNLOCK(ph);
5515 return;
5516 }
5517
5518 /*
5519 * Cannot offline pip(s)
5520 */
5521 if (unstable) {
5522 cmn_err(CE_WARN, "%s%d: mdi_phci_retire_finalize: "
5523 "pHCI in transient state, cannot retire",
5524 ddi_driver_name(dip), ddi_get_instance(dip));
5525 MDI_PHCI_UNLOCK(ph);
5526 return;
5527 }
5528
5529 /*
5530 * Mark the pHCI as offline
5531 */
5532 MDI_PHCI_SET_OFFLINE(ph);
5533
5534 /*
5535 * Mark the child mdi_pathinfo nodes as transient
5536 */
5537 pip = ph->ph_path_head;
5538 while (pip != NULL) {
5539 MDI_PI_LOCK(pip);
5540 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5541 MDI_PI_SET_OFFLINING(pip);
5542 MDI_PI_UNLOCK(pip);
5543 pip = next;
5544 }
5545 MDI_PHCI_UNLOCK(ph);
5546 /*
5547 * Give a chance for any pending commands to execute
5548 */
5549 delay_random(mdi_delay);
5550 MDI_PHCI_LOCK(ph);
5551 pip = ph->ph_path_head;
5552 while (pip != NULL) {
5553 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5554 (void) i_mdi_pi_offline(pip, 0);
5555 MDI_PI_LOCK(pip);
5556 ct = MDI_PI(pip)->pi_client;
5557 if (!MDI_PI_IS_OFFLINE(pip)) {
5558 cmn_err(CE_WARN, "mdi_phci_retire_finalize: "
5559 "path %d %s busy, cannot offline",
5560 mdi_pi_get_path_instance(pip),
5561 mdi_pi_spathname(pip));
5562 MDI_PI_UNLOCK(pip);
5563 MDI_PHCI_SET_ONLINE(ph);
5564 MDI_PHCI_UNLOCK(ph);
5565 return;
5566 }
5567 MDI_PI_UNLOCK(pip);
5568 pip = next;
5569 }
5570 MDI_PHCI_UNLOCK(ph);
5571
5572 return;
5573 }
5574
5575 void
5576 mdi_phci_unretire(dev_info_t *dip)
5577 {
5578 mdi_phci_t *ph;
5579 mdi_pathinfo_t *pip;
5580 mdi_pathinfo_t *next;
5581
5582 ASSERT(MDI_PHCI(dip));
5583
5584 /*
5585 * Online the phci
5586 */
5587 i_mdi_phci_online(dip);
5588
5589 ph = i_devi_get_phci(dip);
5590 MDI_PHCI_LOCK(ph);
5591 pip = ph->ph_path_head;
5592 while (pip != NULL) {
5593 MDI_PI_LOCK(pip);
5594 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5595 MDI_PI_UNLOCK(pip);
5596 (void) i_mdi_pi_online(pip, 0);
5597 pip = next;
5598 }
5599 MDI_PHCI_UNLOCK(ph);
5600 }
5601
5602 /*ARGSUSED*/
5603 static int
5604 i_mdi_client_offline(dev_info_t *dip, uint_t flags)
5605 {
5606 int rv = NDI_SUCCESS;
5607 mdi_client_t *ct;
5608
5609 /*
5610 * Client component to go offline. Make sure that we are
5611 * not in failing over state and update client state
5612 * accordingly
5613 */
5614 ct = i_devi_get_client(dip);
5615 MDI_DEBUG(2, (MDI_NOTE, dip,
5616 "called %p %p", (void *)dip, (void *)ct));
5617 if (ct != NULL) {
5618 MDI_CLIENT_LOCK(ct);
5619 if (ct->ct_unstable) {
5620 /*
5621 * One or more paths are in transient state,
5622 * Dont allow offline of a client device
5623 */
5624 MDI_DEBUG(1, (MDI_WARN, dip,
5625 "!One or more paths to "
5626 "this device are in transient state. "
5627 "This device can not be removed at this moment. "
5628 "Please try again later."));
5629 MDI_CLIENT_UNLOCK(ct);
5630 return (NDI_BUSY);
5631 }
5632 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
5633 /*
5634 * Failover is in progress, Dont allow DR of
5635 * a client device
5636 */
5637 MDI_DEBUG(1, (MDI_WARN, dip,
5638 "!Client device is Busy. "
5639 "This device can not be removed at this moment. "
5640 "Please try again later."));
5641 MDI_CLIENT_UNLOCK(ct);
5642 return (NDI_BUSY);
5643 }
5644 MDI_CLIENT_SET_OFFLINE(ct);
5645
5646 /*
5647 * Unbind our relationship with the dev_info node
5648 */
5649 if (flags & NDI_DEVI_REMOVE) {
5650 ct->ct_dip = NULL;
5651 }
5652 MDI_CLIENT_UNLOCK(ct);
5653 }
5654 return (rv);
5655 }
5656
5657 /*
5658 * mdi_pre_attach():
5659 * Pre attach() notification handler
5660 */
5661 /*ARGSUSED*/
5662 int
5663 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5664 {
5665 /* don't support old DDI_PM_RESUME */
5666 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) &&
5667 (cmd == DDI_PM_RESUME))
5668 return (DDI_FAILURE);
5669
5670 return (DDI_SUCCESS);
5671 }
5672
5673 /*
5674 * mdi_post_attach():
5675 * Post attach() notification handler
5676 */
5677 /*ARGSUSED*/
5678 void
5679 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error)
5680 {
5681 mdi_phci_t *ph;
5682 mdi_client_t *ct;
5683 mdi_vhci_t *vh;
5684
5685 if (MDI_PHCI(dip)) {
5686 ph = i_devi_get_phci(dip);
5687 ASSERT(ph != NULL);
5688
5689 MDI_PHCI_LOCK(ph);
5690 switch (cmd) {
5691 case DDI_ATTACH:
5692 MDI_DEBUG(2, (MDI_NOTE, dip,
5693 "phci post_attach called %p", (void *)ph));
5694 if (error == DDI_SUCCESS) {
5695 MDI_PHCI_SET_ATTACH(ph);
5696 } else {
5697 MDI_DEBUG(1, (MDI_NOTE, dip,
5698 "!pHCI post_attach failed: error %d",
5699 error));
5700 MDI_PHCI_SET_DETACH(ph);
5701 }
5702 break;
5703
5704 case DDI_RESUME:
5705 MDI_DEBUG(2, (MDI_NOTE, dip,
5706 "pHCI post_resume: called %p", (void *)ph));
5707 if (error == DDI_SUCCESS) {
5708 MDI_PHCI_SET_RESUME(ph);
5709 } else {
5710 MDI_DEBUG(1, (MDI_NOTE, dip,
5711 "!pHCI post_resume failed: error %d",
5712 error));
5713 MDI_PHCI_SET_SUSPEND(ph);
5714 }
5715 break;
5716 }
5717 MDI_PHCI_UNLOCK(ph);
5718 }
5719
5720 if (MDI_CLIENT(dip)) {
5721 ct = i_devi_get_client(dip);
5722 ASSERT(ct != NULL);
5723
5724 MDI_CLIENT_LOCK(ct);
5725 switch (cmd) {
5726 case DDI_ATTACH:
5727 MDI_DEBUG(2, (MDI_NOTE, dip,
5728 "client post_attach called %p", (void *)ct));
5729 if (error != DDI_SUCCESS) {
5730 MDI_DEBUG(1, (MDI_NOTE, dip,
5731 "!client post_attach failed: error %d",
5732 error));
5733 MDI_CLIENT_SET_DETACH(ct);
5734 MDI_DEBUG(4, (MDI_WARN, dip,
5735 "i_mdi_pm_reset_client"));
5736 i_mdi_pm_reset_client(ct);
5737 break;
5738 }
5739
5740 /*
5741 * Client device has successfully attached, inform
5742 * the vhci.
5743 */
5744 vh = ct->ct_vhci;
5745 if (vh->vh_ops->vo_client_attached)
5746 (*vh->vh_ops->vo_client_attached)(dip);
5747
5748 MDI_CLIENT_SET_ATTACH(ct);
5749 break;
5750
5751 case DDI_RESUME:
5752 MDI_DEBUG(2, (MDI_NOTE, dip,
5753 "client post_attach: called %p", (void *)ct));
5754 if (error == DDI_SUCCESS) {
5755 MDI_CLIENT_SET_RESUME(ct);
5756 } else {
5757 MDI_DEBUG(1, (MDI_NOTE, dip,
5758 "!client post_resume failed: error %d",
5759 error));
5760 MDI_CLIENT_SET_SUSPEND(ct);
5761 }
5762 break;
5763 }
5764 MDI_CLIENT_UNLOCK(ct);
5765 }
5766 }
5767
5768 /*
5769 * mdi_pre_detach():
5770 * Pre detach notification handler
5771 */
5772 /*ARGSUSED*/
5773 int
5774 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5775 {
5776 int rv = DDI_SUCCESS;
5777
5778 if (MDI_CLIENT(dip)) {
5779 (void) i_mdi_client_pre_detach(dip, cmd);
5780 }
5781
5782 if (MDI_PHCI(dip)) {
5783 rv = i_mdi_phci_pre_detach(dip, cmd);
5784 }
5785
5786 return (rv);
5787 }
5788
5789 /*ARGSUSED*/
5790 static int
5791 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5792 {
5793 int rv = DDI_SUCCESS;
5794 mdi_phci_t *ph;
5795 mdi_client_t *ct;
5796 mdi_pathinfo_t *pip;
5797 mdi_pathinfo_t *failed_pip = NULL;
5798 mdi_pathinfo_t *next;
5799
5800 ph = i_devi_get_phci(dip);
5801 if (ph == NULL) {
5802 return (rv);
5803 }
5804
5805 MDI_PHCI_LOCK(ph);
5806 switch (cmd) {
5807 case DDI_DETACH:
5808 MDI_DEBUG(2, (MDI_NOTE, dip,
5809 "pHCI pre_detach: called %p", (void *)ph));
5810 if (!MDI_PHCI_IS_OFFLINE(ph)) {
5811 /*
5812 * mdi_pathinfo nodes are still attached to
5813 * this pHCI. Fail the detach for this pHCI.
5814 */
5815 MDI_DEBUG(2, (MDI_WARN, dip,
5816 "pHCI pre_detach: paths are still attached %p",
5817 (void *)ph));
5818 rv = DDI_FAILURE;
5819 break;
5820 }
5821 MDI_PHCI_SET_DETACH(ph);
5822 break;
5823
5824 case DDI_SUSPEND:
5825 /*
5826 * pHCI is getting suspended. Since mpxio client
5827 * devices may not be suspended at this point, to avoid
5828 * a potential stack overflow, it is important to suspend
5829 * client devices before pHCI can be suspended.
5830 */
5831
5832 MDI_DEBUG(2, (MDI_NOTE, dip,
5833 "pHCI pre_suspend: called %p", (void *)ph));
5834 /*
5835 * Suspend all the client devices accessible through this pHCI
5836 */
5837 pip = ph->ph_path_head;
5838 while (pip != NULL && rv == DDI_SUCCESS) {
5839 dev_info_t *cdip;
5840 MDI_PI_LOCK(pip);
5841 next =
5842 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5843 ct = MDI_PI(pip)->pi_client;
5844 i_mdi_client_lock(ct, pip);
5845 cdip = ct->ct_dip;
5846 MDI_PI_UNLOCK(pip);
5847 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) &&
5848 MDI_CLIENT_IS_SUSPENDED(ct) == 0) {
5849 i_mdi_client_unlock(ct);
5850 if ((rv = devi_detach(cdip, DDI_SUSPEND)) !=
5851 DDI_SUCCESS) {
5852 /*
5853 * Suspend of one of the client
5854 * device has failed.
5855 */
5856 MDI_DEBUG(1, (MDI_WARN, dip,
5857 "!suspend of device (%s%d) failed.",
5858 ddi_driver_name(cdip),
5859 ddi_get_instance(cdip)));
5860 failed_pip = pip;
5861 break;
5862 }
5863 } else {
5864 i_mdi_client_unlock(ct);
5865 }
5866 pip = next;
5867 }
5868
5869 if (rv == DDI_SUCCESS) {
5870 /*
5871 * Suspend of client devices is complete. Proceed
5872 * with pHCI suspend.
5873 */
5874 MDI_PHCI_SET_SUSPEND(ph);
5875 } else {
5876 /*
5877 * Revert back all the suspended client device states
5878 * to converse.
5879 */
5880 pip = ph->ph_path_head;
5881 while (pip != failed_pip) {
5882 dev_info_t *cdip;
5883 MDI_PI_LOCK(pip);
5884 next =
5885 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5886 ct = MDI_PI(pip)->pi_client;
5887 i_mdi_client_lock(ct, pip);
5888 cdip = ct->ct_dip;
5889 MDI_PI_UNLOCK(pip);
5890 if (MDI_CLIENT_IS_SUSPENDED(ct)) {
5891 i_mdi_client_unlock(ct);
5892 (void) devi_attach(cdip, DDI_RESUME);
5893 } else {
5894 i_mdi_client_unlock(ct);
5895 }
5896 pip = next;
5897 }
5898 }
5899 break;
5900
5901 default:
5902 rv = DDI_FAILURE;
5903 break;
5904 }
5905 MDI_PHCI_UNLOCK(ph);
5906 return (rv);
5907 }
5908
5909 /*ARGSUSED*/
5910 static int
5911 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5912 {
5913 int rv = DDI_SUCCESS;
5914 mdi_client_t *ct;
5915
5916 ct = i_devi_get_client(dip);
5917 if (ct == NULL) {
5918 return (rv);
5919 }
5920
5921 MDI_CLIENT_LOCK(ct);
5922 switch (cmd) {
5923 case DDI_DETACH:
5924 MDI_DEBUG(2, (MDI_NOTE, dip,
5925 "client pre_detach: called %p",
5926 (void *)ct));
5927 MDI_CLIENT_SET_DETACH(ct);
5928 break;
5929
5930 case DDI_SUSPEND:
5931 MDI_DEBUG(2, (MDI_NOTE, dip,
5932 "client pre_suspend: called %p",
5933 (void *)ct));
5934 MDI_CLIENT_SET_SUSPEND(ct);
5935 break;
5936
5937 default:
5938 rv = DDI_FAILURE;
5939 break;
5940 }
5941 MDI_CLIENT_UNLOCK(ct);
5942 return (rv);
5943 }
5944
5945 /*
5946 * mdi_post_detach():
5947 * Post detach notification handler
5948 */
5949 /*ARGSUSED*/
5950 void
5951 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5952 {
5953 /*
5954 * Detach/Suspend of mpxio component failed. Update our state
5955 * too
5956 */
5957 if (MDI_PHCI(dip))
5958 i_mdi_phci_post_detach(dip, cmd, error);
5959
5960 if (MDI_CLIENT(dip))
5961 i_mdi_client_post_detach(dip, cmd, error);
5962 }
5963
5964 /*ARGSUSED*/
5965 static void
5966 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5967 {
5968 mdi_phci_t *ph;
5969
5970 /*
5971 * Detach/Suspend of phci component failed. Update our state
5972 * too
5973 */
5974 ph = i_devi_get_phci(dip);
5975 if (ph == NULL) {
5976 return;
5977 }
5978
5979 MDI_PHCI_LOCK(ph);
5980 /*
5981 * Detach of pHCI failed. Restore back converse
5982 * state
5983 */
5984 switch (cmd) {
5985 case DDI_DETACH:
5986 MDI_DEBUG(2, (MDI_NOTE, dip,
5987 "pHCI post_detach: called %p",
5988 (void *)ph));
5989 if (error != DDI_SUCCESS)
5990 MDI_PHCI_SET_ATTACH(ph);
5991 break;
5992
5993 case DDI_SUSPEND:
5994 MDI_DEBUG(2, (MDI_NOTE, dip,
5995 "pHCI post_suspend: called %p",
5996 (void *)ph));
5997 if (error != DDI_SUCCESS)
5998 MDI_PHCI_SET_RESUME(ph);
5999 break;
6000 }
6001 MDI_PHCI_UNLOCK(ph);
6002 }
6003
6004 /*ARGSUSED*/
6005 static void
6006 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
6007 {
6008 mdi_client_t *ct;
6009
6010 ct = i_devi_get_client(dip);
6011 if (ct == NULL) {
6012 return;
6013 }
6014 MDI_CLIENT_LOCK(ct);
6015 /*
6016 * Detach of Client failed. Restore back converse
6017 * state
6018 */
6019 switch (cmd) {
6020 case DDI_DETACH:
6021 MDI_DEBUG(2, (MDI_NOTE, dip,
6022 "client post_detach: called %p", (void *)ct));
6023 if (DEVI_IS_ATTACHING(dip)) {
6024 MDI_DEBUG(4, (MDI_NOTE, dip,
6025 "i_mdi_pm_rele_client\n"));
6026 i_mdi_pm_rele_client(ct, ct->ct_path_count);
6027 } else {
6028 MDI_DEBUG(4, (MDI_NOTE, dip,
6029 "i_mdi_pm_reset_client\n"));
6030 i_mdi_pm_reset_client(ct);
6031 }
6032 if (error != DDI_SUCCESS)
6033 MDI_CLIENT_SET_ATTACH(ct);
6034 break;
6035
6036 case DDI_SUSPEND:
6037 MDI_DEBUG(2, (MDI_NOTE, dip,
6038 "called %p", (void *)ct));
6039 if (error != DDI_SUCCESS)
6040 MDI_CLIENT_SET_RESUME(ct);
6041 break;
6042 }
6043 MDI_CLIENT_UNLOCK(ct);
6044 }
6045
6046 int
6047 mdi_pi_kstat_exists(mdi_pathinfo_t *pip)
6048 {
6049 return (MDI_PI(pip)->pi_kstats ? 1 : 0);
6050 }
6051
6052 /*
6053 * create and install per-path (client - pHCI) statistics
6054 * I/O stats supported: nread, nwritten, reads, and writes
6055 * Error stats - hard errors, soft errors, & transport errors
6056 */
6057 int
6058 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname)
6059 {
6060 kstat_t *kiosp, *kerrsp;
6061 struct pi_errs *nsp;
6062 struct mdi_pi_kstats *mdi_statp;
6063 char *errksname;
6064 size_t len;
6065
6066 /*
6067 * If the kstat name was already created nothing to do.
6068 */
6069 if ((kiosp = kstat_hold_byname("mdi", 0, ksname,
6070 ALL_ZONES)) != NULL) {
6071 kstat_rele(kiosp);
6072 return (MDI_SUCCESS);
6073 }
6074
6075 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath",
6076 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) {
6077 return (MDI_FAILURE);
6078 }
6079
6080 len = strlen(ksname) + strlen(",err") + 1;
6081 errksname = kmem_alloc(len, KM_SLEEP);
6082 (void) snprintf(errksname, len, "%s,err", ksname);
6083
6084 kerrsp = kstat_create("mdi", 0, errksname, "iopath_errors",
6085 KSTAT_TYPE_NAMED,
6086 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0);
6087 if (kerrsp == NULL) {
6088 kstat_delete(kiosp);
6089 kmem_free(errksname, len);
6090 return (MDI_FAILURE);
6091 }
6092
6093 nsp = (struct pi_errs *)kerrsp->ks_data;
6094 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32);
6095 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32);
6096 kstat_named_init(&nsp->pi_transerrs, "Transport Errors",
6097 KSTAT_DATA_UINT32);
6098 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy",
6099 KSTAT_DATA_UINT32);
6100 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors",
6101 KSTAT_DATA_UINT32);
6102 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources",
6103 KSTAT_DATA_UINT32);
6104 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors",
6105 KSTAT_DATA_UINT32);
6106 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State",
6107 KSTAT_DATA_UINT32);
6108 kstat_named_init(&nsp->pi_failedfrom, "Failed From",
6109 KSTAT_DATA_UINT32);
6110 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32);
6111
6112 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP);
6113 mdi_statp->pi_kstat_ref = 1;
6114 mdi_statp->pi_kstat_iostats = kiosp;
6115 mdi_statp->pi_kstat_errstats = kerrsp;
6116 kstat_install(kiosp);
6117 kstat_install(kerrsp);
6118 MDI_PI(pip)->pi_kstats = mdi_statp;
6119 kmem_free(errksname, len);
6120 return (MDI_SUCCESS);
6121 }
6122
6123 /*
6124 * destroy per-path properties
6125 */
6126 static void
6127 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip)
6128 {
6129
6130 struct mdi_pi_kstats *mdi_statp;
6131
6132 if (MDI_PI(pip)->pi_kstats == NULL)
6133 return;
6134 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL)
6135 return;
6136
6137 MDI_PI(pip)->pi_kstats = NULL;
6138
6139 /*
6140 * the kstat may be shared between multiple pathinfo nodes
6141 * decrement this pathinfo's usage, removing the kstats
6142 * themselves when the last pathinfo reference is removed.
6143 */
6144 ASSERT(mdi_statp->pi_kstat_ref > 0);
6145 if (--mdi_statp->pi_kstat_ref != 0)
6146 return;
6147
6148 kstat_delete(mdi_statp->pi_kstat_iostats);
6149 kstat_delete(mdi_statp->pi_kstat_errstats);
6150 kmem_free(mdi_statp, sizeof (*mdi_statp));
6151 }
6152
6153 /*
6154 * update I/O paths KSTATS
6155 */
6156 void
6157 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp)
6158 {
6159 kstat_t *iostatp;
6160 size_t xfer_cnt;
6161
6162 ASSERT(pip != NULL);
6163
6164 /*
6165 * I/O can be driven across a path prior to having path
6166 * statistics available, i.e. probe(9e).
6167 */
6168 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) {
6169 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats;
6170 xfer_cnt = bp->b_bcount - bp->b_resid;
6171 if (bp->b_flags & B_READ) {
6172 KSTAT_IO_PTR(iostatp)->reads++;
6173 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt;
6174 } else {
6175 KSTAT_IO_PTR(iostatp)->writes++;
6176 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt;
6177 }
6178 }
6179 }
6180
6181 /*
6182 * Enable the path(specific client/target/initiator)
6183 * Enabling a path means that MPxIO may select the enabled path for routing
6184 * future I/O requests, subject to other path state constraints.
6185 */
6186 int
6187 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags)
6188 {
6189 mdi_phci_t *ph;
6190
6191 ph = MDI_PI(pip)->pi_phci;
6192 if (ph == NULL) {
6193 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip),
6194 "!failed: path %s %p: NULL ph",
6195 mdi_pi_spathname(pip), (void *)pip));
6196 return (MDI_FAILURE);
6197 }
6198
6199 (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags,
6200 MDI_ENABLE_OP);
6201 MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip,
6202 "!returning success pip = %p. ph = %p",
6203 (void *)pip, (void *)ph));
6204 return (MDI_SUCCESS);
6205
6206 }
6207
6208 /*
6209 * Disable the path (specific client/target/initiator)
6210 * Disabling a path means that MPxIO will not select the disabled path for
6211 * routing any new I/O requests.
6212 */
6213 int
6214 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags)
6215 {
6216 mdi_phci_t *ph;
6217
6218 ph = MDI_PI(pip)->pi_phci;
6219 if (ph == NULL) {
6220 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip),
6221 "!failed: path %s %p: NULL ph",
6222 mdi_pi_spathname(pip), (void *)pip));
6223 return (MDI_FAILURE);
6224 }
6225
6226 (void) i_mdi_enable_disable_path(pip,
6227 ph->ph_vhci, flags, MDI_DISABLE_OP);
6228 MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip,
6229 "!returning success pip = %p. ph = %p",
6230 (void *)pip, (void *)ph));
6231 return (MDI_SUCCESS);
6232 }
6233
6234 /*
6235 * disable the path to a particular pHCI (pHCI specified in the phci_path
6236 * argument) for a particular client (specified in the client_path argument).
6237 * Disabling a path means that MPxIO will not select the disabled path for
6238 * routing any new I/O requests.
6239 * NOTE: this will be removed once the NWS files are changed to use the new
6240 * mdi_{enable,disable}_path interfaces
6241 */
6242 int
6243 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags)
6244 {
6245 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP));
6246 }
6247
6248 /*
6249 * Enable the path to a particular pHCI (pHCI specified in the phci_path
6250 * argument) for a particular client (specified in the client_path argument).
6251 * Enabling a path means that MPxIO may select the enabled path for routing
6252 * future I/O requests, subject to other path state constraints.
6253 * NOTE: this will be removed once the NWS files are changed to use the new
6254 * mdi_{enable,disable}_path interfaces
6255 */
6256
6257 int
6258 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags)
6259 {
6260 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP));
6261 }
6262
6263 /*
6264 * Common routine for doing enable/disable.
6265 */
6266 static mdi_pathinfo_t *
6267 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags,
6268 int op)
6269 {
6270 int sync_flag = 0;
6271 int rv;
6272 mdi_pathinfo_t *next;
6273 int (*f)() = NULL;
6274
6275 /*
6276 * Check to make sure the path is not already in the
6277 * requested state. If it is just return the next path
6278 * as we have nothing to do here.
6279 */
6280 if ((MDI_PI_IS_DISABLE(pip) && op == MDI_DISABLE_OP) ||
6281 (!MDI_PI_IS_DISABLE(pip) && op == MDI_ENABLE_OP)) {
6282 MDI_PI_LOCK(pip);
6283 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
6284 MDI_PI_UNLOCK(pip);
6285 return (next);
6286 }
6287
6288 f = vh->vh_ops->vo_pi_state_change;
6289
6290 sync_flag = (flags << 8) & 0xf00;
6291
6292 /*
6293 * Do a callback into the mdi consumer to let it
6294 * know that path is about to get enabled/disabled.
6295 */
6296 if (f != NULL) {
6297 rv = (*f)(vh->vh_dip, pip, 0,
6298 MDI_PI_EXT_STATE(pip),
6299 MDI_EXT_STATE_CHANGE | sync_flag |
6300 op | MDI_BEFORE_STATE_CHANGE);
6301 if (rv != MDI_SUCCESS) {
6302 MDI_DEBUG(2, (MDI_WARN, vh->vh_dip,
6303 "vo_pi_state_change: failed rv = %x", rv));
6304 }
6305 }
6306 MDI_PI_LOCK(pip);
6307 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
6308
6309 switch (flags) {
6310 case USER_DISABLE:
6311 if (op == MDI_DISABLE_OP) {
6312 MDI_PI_SET_USER_DISABLE(pip);
6313 } else {
6314 MDI_PI_SET_USER_ENABLE(pip);
6315 }
6316 break;
6317 case DRIVER_DISABLE:
6318 if (op == MDI_DISABLE_OP) {
6319 MDI_PI_SET_DRV_DISABLE(pip);
6320 } else {
6321 MDI_PI_SET_DRV_ENABLE(pip);
6322 }
6323 break;
6324 case DRIVER_DISABLE_TRANSIENT:
6325 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) {
6326 MDI_PI_SET_DRV_DISABLE_TRANS(pip);
6327 } else {
6328 MDI_PI_SET_DRV_ENABLE_TRANS(pip);
6329 }
6330 break;
6331 }
6332 MDI_PI_UNLOCK(pip);
6333 /*
6334 * Do a callback into the mdi consumer to let it
6335 * know that path is now enabled/disabled.
6336 */
6337 if (f != NULL) {
6338 rv = (*f)(vh->vh_dip, pip, 0,
6339 MDI_PI_EXT_STATE(pip),
6340 MDI_EXT_STATE_CHANGE | sync_flag |
6341 op | MDI_AFTER_STATE_CHANGE);
6342 if (rv != MDI_SUCCESS) {
6343 MDI_DEBUG(2, (MDI_WARN, vh->vh_dip,
6344 "vo_pi_state_change failed: rv = %x", rv));
6345 }
6346 }
6347 return (next);
6348 }
6349
6350 /*
6351 * Common routine for doing enable/disable.
6352 * NOTE: this will be removed once the NWS files are changed to use the new
6353 * mdi_{enable,disable}_path has been putback
6354 */
6355 int
6356 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op)
6357 {
6358
6359 mdi_phci_t *ph;
6360 mdi_vhci_t *vh = NULL;
6361 mdi_client_t *ct;
6362 mdi_pathinfo_t *next, *pip;
6363 int found_it;
6364
6365 ph = i_devi_get_phci(pdip);
6366 MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip,
6367 "!op = %d pdip = %p cdip = %p", op, (void *)pdip,
6368 (void *)cdip));
6369 if (ph == NULL) {
6370 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6371 "!failed: operation %d: NULL ph", op));
6372 return (MDI_FAILURE);
6373 }
6374
6375 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) {
6376 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6377 "!failed: invalid operation %d", op));
6378 return (MDI_FAILURE);
6379 }
6380
6381 vh = ph->ph_vhci;
6382
6383 if (cdip == NULL) {
6384 /*
6385 * Need to mark the Phci as enabled/disabled.
6386 */
6387 MDI_DEBUG(4, (MDI_NOTE, cdip ? cdip : pdip,
6388 "op %d for the phci", op));
6389 MDI_PHCI_LOCK(ph);
6390 switch (flags) {
6391 case USER_DISABLE:
6392 if (op == MDI_DISABLE_OP) {
6393 MDI_PHCI_SET_USER_DISABLE(ph);
6394 } else {
6395 MDI_PHCI_SET_USER_ENABLE(ph);
6396 }
6397 break;
6398 case DRIVER_DISABLE:
6399 if (op == MDI_DISABLE_OP) {
6400 MDI_PHCI_SET_DRV_DISABLE(ph);
6401 } else {
6402 MDI_PHCI_SET_DRV_ENABLE(ph);
6403 }
6404 break;
6405 case DRIVER_DISABLE_TRANSIENT:
6406 if (op == MDI_DISABLE_OP) {
6407 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph);
6408 } else {
6409 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph);
6410 }
6411 break;
6412 default:
6413 MDI_PHCI_UNLOCK(ph);
6414 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6415 "!invalid flag argument= %d", flags));
6416 }
6417
6418 /*
6419 * Phci has been disabled. Now try to enable/disable
6420 * path info's to each client.
6421 */
6422 pip = ph->ph_path_head;
6423 while (pip != NULL) {
6424 pip = i_mdi_enable_disable_path(pip, vh, flags, op);
6425 }
6426 MDI_PHCI_UNLOCK(ph);
6427 } else {
6428
6429 /*
6430 * Disable a specific client.
6431 */
6432 ct = i_devi_get_client(cdip);
6433 if (ct == NULL) {
6434 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6435 "!failed: operation = %d: NULL ct", op));
6436 return (MDI_FAILURE);
6437 }
6438
6439 MDI_CLIENT_LOCK(ct);
6440 pip = ct->ct_path_head;
6441 found_it = 0;
6442 while (pip != NULL) {
6443 MDI_PI_LOCK(pip);
6444 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6445 if (MDI_PI(pip)->pi_phci == ph) {
6446 MDI_PI_UNLOCK(pip);
6447 found_it = 1;
6448 break;
6449 }
6450 MDI_PI_UNLOCK(pip);
6451 pip = next;
6452 }
6453
6454
6455 MDI_CLIENT_UNLOCK(ct);
6456 if (found_it == 0) {
6457 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6458 "!failed. Could not find corresponding pip\n"));
6459 return (MDI_FAILURE);
6460 }
6461
6462 (void) i_mdi_enable_disable_path(pip, vh, flags, op);
6463 }
6464
6465 MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip,
6466 "!op %d returning success pdip = %p cdip = %p",
6467 op, (void *)pdip, (void *)cdip));
6468 return (MDI_SUCCESS);
6469 }
6470
6471 /*
6472 * Ensure phci powered up
6473 */
6474 static void
6475 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip)
6476 {
6477 dev_info_t *ph_dip;
6478
6479 ASSERT(pip != NULL);
6480 ASSERT(MDI_PI_LOCKED(pip));
6481
6482 if (MDI_PI(pip)->pi_pm_held) {
6483 return;
6484 }
6485
6486 ph_dip = mdi_pi_get_phci(pip);
6487 MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6488 "%s %p", mdi_pi_spathname(pip), (void *)pip));
6489 if (ph_dip == NULL) {
6490 return;
6491 }
6492
6493 MDI_PI_UNLOCK(pip);
6494 MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt was %d",
6495 DEVI(ph_dip)->devi_pm_kidsupcnt));
6496 pm_hold_power(ph_dip);
6497 MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt is %d",
6498 DEVI(ph_dip)->devi_pm_kidsupcnt));
6499 MDI_PI_LOCK(pip);
6500
6501 /* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */
6502 if (DEVI(ph_dip)->devi_pm_info)
6503 MDI_PI(pip)->pi_pm_held = 1;
6504 }
6505
6506 /*
6507 * Allow phci powered down
6508 */
6509 static void
6510 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip)
6511 {
6512 dev_info_t *ph_dip = NULL;
6513
6514 ASSERT(pip != NULL);
6515 ASSERT(MDI_PI_LOCKED(pip));
6516
6517 if (MDI_PI(pip)->pi_pm_held == 0) {
6518 return;
6519 }
6520
6521 ph_dip = mdi_pi_get_phci(pip);
6522 ASSERT(ph_dip != NULL);
6523
6524 MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6525 "%s %p", mdi_pi_spathname(pip), (void *)pip));
6526
6527 MDI_PI_UNLOCK(pip);
6528 MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6529 "kidsupcnt was %d", DEVI(ph_dip)->devi_pm_kidsupcnt));
6530 pm_rele_power(ph_dip);
6531 MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6532 "kidsupcnt is %d", DEVI(ph_dip)->devi_pm_kidsupcnt));
6533 MDI_PI_LOCK(pip);
6534
6535 MDI_PI(pip)->pi_pm_held = 0;
6536 }
6537
6538 static void
6539 i_mdi_pm_hold_client(mdi_client_t *ct, int incr)
6540 {
6541 ASSERT(MDI_CLIENT_LOCKED(ct));
6542
6543 ct->ct_power_cnt += incr;
6544 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6545 "%p ct_power_cnt = %d incr = %d",
6546 (void *)ct, ct->ct_power_cnt, incr));
6547 ASSERT(ct->ct_power_cnt >= 0);
6548 }
6549
6550 static void
6551 i_mdi_rele_all_phci(mdi_client_t *ct)
6552 {
6553 mdi_pathinfo_t *pip;
6554
6555 ASSERT(MDI_CLIENT_LOCKED(ct));
6556 pip = (mdi_pathinfo_t *)ct->ct_path_head;
6557 while (pip != NULL) {
6558 mdi_hold_path(pip);
6559 MDI_PI_LOCK(pip);
6560 i_mdi_pm_rele_pip(pip);
6561 MDI_PI_UNLOCK(pip);
6562 mdi_rele_path(pip);
6563 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6564 }
6565 }
6566
6567 static void
6568 i_mdi_pm_rele_client(mdi_client_t *ct, int decr)
6569 {
6570 ASSERT(MDI_CLIENT_LOCKED(ct));
6571
6572 if (i_ddi_devi_attached(ct->ct_dip)) {
6573 ct->ct_power_cnt -= decr;
6574 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6575 "%p ct_power_cnt = %d decr = %d",
6576 (void *)ct, ct->ct_power_cnt, decr));
6577 }
6578
6579 ASSERT(ct->ct_power_cnt >= 0);
6580 if (ct->ct_power_cnt == 0) {
6581 i_mdi_rele_all_phci(ct);
6582 return;
6583 }
6584 }
6585
6586 static void
6587 i_mdi_pm_reset_client(mdi_client_t *ct)
6588 {
6589 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6590 "%p ct_power_cnt = %d", (void *)ct, ct->ct_power_cnt));
6591 ASSERT(MDI_CLIENT_LOCKED(ct));
6592 ct->ct_power_cnt = 0;
6593 i_mdi_rele_all_phci(ct);
6594 ct->ct_powercnt_config = 0;
6595 ct->ct_powercnt_unconfig = 0;
6596 ct->ct_powercnt_reset = 1;
6597 }
6598
6599 static int
6600 i_mdi_power_one_phci(mdi_pathinfo_t *pip)
6601 {
6602 int ret;
6603 dev_info_t *ph_dip;
6604
6605 MDI_PI_LOCK(pip);
6606 i_mdi_pm_hold_pip(pip);
6607
6608 ph_dip = mdi_pi_get_phci(pip);
6609 MDI_PI_UNLOCK(pip);
6610
6611 /* bring all components of phci to full power */
6612 MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6613 "pm_powerup for %s%d %p", ddi_driver_name(ph_dip),
6614 ddi_get_instance(ph_dip), (void *)pip));
6615
6616 ret = pm_powerup(ph_dip);
6617
6618 if (ret == DDI_FAILURE) {
6619 MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6620 "pm_powerup FAILED for %s%d %p",
6621 ddi_driver_name(ph_dip), ddi_get_instance(ph_dip),
6622 (void *)pip));
6623
6624 MDI_PI_LOCK(pip);
6625 i_mdi_pm_rele_pip(pip);
6626 MDI_PI_UNLOCK(pip);
6627 return (MDI_FAILURE);
6628 }
6629
6630 return (MDI_SUCCESS);
6631 }
6632
6633 static int
6634 i_mdi_power_all_phci(mdi_client_t *ct)
6635 {
6636 mdi_pathinfo_t *pip;
6637 int succeeded = 0;
6638
6639 ASSERT(MDI_CLIENT_LOCKED(ct));
6640 pip = (mdi_pathinfo_t *)ct->ct_path_head;
6641 while (pip != NULL) {
6642 /*
6643 * Don't power if MDI_PATHINFO_STATE_FAULT
6644 * or MDI_PATHINFO_STATE_OFFLINE.
6645 */
6646 if (MDI_PI_IS_INIT(pip) ||
6647 MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) {
6648 mdi_hold_path(pip);
6649 MDI_CLIENT_UNLOCK(ct);
6650 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS)
6651 succeeded = 1;
6652
6653 ASSERT(ct == MDI_PI(pip)->pi_client);
6654 MDI_CLIENT_LOCK(ct);
6655 mdi_rele_path(pip);
6656 }
6657 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6658 }
6659
6660 return (succeeded ? MDI_SUCCESS : MDI_FAILURE);
6661 }
6662
6663 /*
6664 * mdi_bus_power():
6665 * 1. Place the phci(s) into powered up state so that
6666 * client can do power management
6667 * 2. Ensure phci powered up as client power managing
6668 * Return Values:
6669 * MDI_SUCCESS
6670 * MDI_FAILURE
6671 */
6672 int
6673 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op,
6674 void *arg, void *result)
6675 {
6676 int ret = MDI_SUCCESS;
6677 pm_bp_child_pwrchg_t *bpc;
6678 mdi_client_t *ct;
6679 dev_info_t *cdip;
6680 pm_bp_has_changed_t *bphc;
6681
6682 /*
6683 * BUS_POWER_NOINVOL not supported
6684 */
6685 if (op == BUS_POWER_NOINVOL)
6686 return (MDI_FAILURE);
6687
6688 /*
6689 * ignore other OPs.
6690 * return quickly to save cou cycles on the ct processing
6691 */
6692 switch (op) {
6693 case BUS_POWER_PRE_NOTIFICATION:
6694 case BUS_POWER_POST_NOTIFICATION:
6695 bpc = (pm_bp_child_pwrchg_t *)arg;
6696 cdip = bpc->bpc_dip;
6697 break;
6698 case BUS_POWER_HAS_CHANGED:
6699 bphc = (pm_bp_has_changed_t *)arg;
6700 cdip = bphc->bphc_dip;
6701 break;
6702 default:
6703 return (pm_busop_bus_power(parent, impl_arg, op, arg, result));
6704 }
6705
6706 ASSERT(MDI_CLIENT(cdip));
6707
6708 ct = i_devi_get_client(cdip);
6709 if (ct == NULL)
6710 return (MDI_FAILURE);
6711
6712 /*
6713 * wait till the mdi_pathinfo node state change are processed
6714 */
6715 MDI_CLIENT_LOCK(ct);
6716 switch (op) {
6717 case BUS_POWER_PRE_NOTIFICATION:
6718 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6719 "BUS_POWER_PRE_NOTIFICATION:"
6720 "%s@%s, olevel=%d, nlevel=%d, comp=%d",
6721 ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6722 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp));
6723
6724 /* serialize power level change per client */
6725 while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6726 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6727
6728 MDI_CLIENT_SET_POWER_TRANSITION(ct);
6729
6730 if (ct->ct_power_cnt == 0) {
6731 ret = i_mdi_power_all_phci(ct);
6732 }
6733
6734 /*
6735 * if new_level > 0:
6736 * - hold phci(s)
6737 * - power up phci(s) if not already
6738 * ignore power down
6739 */
6740 if (bpc->bpc_nlevel > 0) {
6741 if (!DEVI_IS_ATTACHING(ct->ct_dip)) {
6742 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6743 "i_mdi_pm_hold_client\n"));
6744 i_mdi_pm_hold_client(ct, ct->ct_path_count);
6745 }
6746 }
6747 break;
6748 case BUS_POWER_POST_NOTIFICATION:
6749 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6750 "BUS_POWER_POST_NOTIFICATION:"
6751 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d",
6752 ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6753 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp,
6754 *(int *)result));
6755
6756 if (*(int *)result == DDI_SUCCESS) {
6757 if (bpc->bpc_nlevel > 0) {
6758 MDI_CLIENT_SET_POWER_UP(ct);
6759 } else {
6760 MDI_CLIENT_SET_POWER_DOWN(ct);
6761 }
6762 }
6763
6764 /* release the hold we did in pre-notification */
6765 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) &&
6766 !DEVI_IS_ATTACHING(ct->ct_dip)) {
6767 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6768 "i_mdi_pm_rele_client\n"));
6769 i_mdi_pm_rele_client(ct, ct->ct_path_count);
6770 }
6771
6772 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) {
6773 /* another thread might started attaching */
6774 if (DEVI_IS_ATTACHING(ct->ct_dip)) {
6775 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6776 "i_mdi_pm_rele_client\n"));
6777 i_mdi_pm_rele_client(ct, ct->ct_path_count);
6778 /* detaching has been taken care in pm_post_unconfig */
6779 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) {
6780 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6781 "i_mdi_pm_reset_client\n"));
6782 i_mdi_pm_reset_client(ct);
6783 }
6784 }
6785
6786 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct);
6787 cv_broadcast(&ct->ct_powerchange_cv);
6788
6789 break;
6790
6791 /* need to do more */
6792 case BUS_POWER_HAS_CHANGED:
6793 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6794 "BUS_POWER_HAS_CHANGED:"
6795 "%s@%s, olevel=%d, nlevel=%d, comp=%d",
6796 ddi_node_name(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip),
6797 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp));
6798
6799 if (bphc->bphc_nlevel > 0 &&
6800 bphc->bphc_nlevel > bphc->bphc_olevel) {
6801 if (ct->ct_power_cnt == 0) {
6802 ret = i_mdi_power_all_phci(ct);
6803 }
6804 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6805 "i_mdi_pm_hold_client\n"));
6806 i_mdi_pm_hold_client(ct, ct->ct_path_count);
6807 }
6808
6809 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) {
6810 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6811 "i_mdi_pm_rele_client\n"));
6812 i_mdi_pm_rele_client(ct, ct->ct_path_count);
6813 }
6814 break;
6815 }
6816
6817 MDI_CLIENT_UNLOCK(ct);
6818 return (ret);
6819 }
6820
6821 static int
6822 i_mdi_pm_pre_config_one(dev_info_t *child)
6823 {
6824 int ret = MDI_SUCCESS;
6825 mdi_client_t *ct;
6826
6827 ct = i_devi_get_client(child);
6828 if (ct == NULL)
6829 return (MDI_FAILURE);
6830
6831 MDI_CLIENT_LOCK(ct);
6832 while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6833 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6834
6835 if (!MDI_CLIENT_IS_FAILED(ct)) {
6836 MDI_CLIENT_UNLOCK(ct);
6837 MDI_DEBUG(4, (MDI_NOTE, child, "already configured\n"));
6838 return (MDI_SUCCESS);
6839 }
6840
6841 if (ct->ct_powercnt_config) {
6842 MDI_CLIENT_UNLOCK(ct);
6843 MDI_DEBUG(4, (MDI_NOTE, child, "already held\n"));
6844 return (MDI_SUCCESS);
6845 }
6846
6847 if (ct->ct_power_cnt == 0) {
6848 ret = i_mdi_power_all_phci(ct);
6849 }
6850 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n"));
6851 i_mdi_pm_hold_client(ct, ct->ct_path_count);
6852 ct->ct_powercnt_config = 1;
6853 ct->ct_powercnt_reset = 0;
6854 MDI_CLIENT_UNLOCK(ct);
6855 return (ret);
6856 }
6857
6858 static int
6859 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child)
6860 {
6861 int ret = MDI_SUCCESS;
6862 dev_info_t *cdip;
6863 int circ;
6864
6865 ASSERT(MDI_VHCI(vdip));
6866
6867 /* ndi_devi_config_one */
6868 if (child) {
6869 ASSERT(DEVI_BUSY_OWNED(vdip));
6870 return (i_mdi_pm_pre_config_one(child));
6871 }
6872
6873 /* devi_config_common */
6874 ndi_devi_enter(vdip, &circ);
6875 cdip = ddi_get_child(vdip);
6876 while (cdip) {
6877 dev_info_t *next = ddi_get_next_sibling(cdip);
6878
6879 ret = i_mdi_pm_pre_config_one(cdip);
6880 if (ret != MDI_SUCCESS)
6881 break;
6882 cdip = next;
6883 }
6884 ndi_devi_exit(vdip, circ);
6885 return (ret);
6886 }
6887
6888 static int
6889 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags)
6890 {
6891 int ret = MDI_SUCCESS;
6892 mdi_client_t *ct;
6893
6894 ct = i_devi_get_client(child);
6895 if (ct == NULL)
6896 return (MDI_FAILURE);
6897
6898 MDI_CLIENT_LOCK(ct);
6899 while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6900 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6901
6902 if (!i_ddi_devi_attached(child)) {
6903 MDI_DEBUG(4, (MDI_NOTE, child, "node detached already\n"));
6904 MDI_CLIENT_UNLOCK(ct);
6905 return (MDI_SUCCESS);
6906 }
6907
6908 if (MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6909 (flags & NDI_AUTODETACH)) {
6910 MDI_DEBUG(4, (MDI_NOTE, child, "auto-modunload\n"));
6911 MDI_CLIENT_UNLOCK(ct);
6912 return (MDI_FAILURE);
6913 }
6914
6915 if (ct->ct_powercnt_unconfig) {
6916 MDI_DEBUG(4, (MDI_NOTE, child, "ct_powercnt_held\n"));
6917 MDI_CLIENT_UNLOCK(ct);
6918 *held = 1;
6919 return (MDI_SUCCESS);
6920 }
6921
6922 if (ct->ct_power_cnt == 0) {
6923 ret = i_mdi_power_all_phci(ct);
6924 }
6925 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n"));
6926 i_mdi_pm_hold_client(ct, ct->ct_path_count);
6927 ct->ct_powercnt_unconfig = 1;
6928 ct->ct_powercnt_reset = 0;
6929 MDI_CLIENT_UNLOCK(ct);
6930 if (ret == MDI_SUCCESS)
6931 *held = 1;
6932 return (ret);
6933 }
6934
6935 static int
6936 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held,
6937 int flags)
6938 {
6939 int ret = MDI_SUCCESS;
6940 dev_info_t *cdip;
6941 int circ;
6942
6943 ASSERT(MDI_VHCI(vdip));
6944 *held = 0;
6945
6946 /* ndi_devi_unconfig_one */
6947 if (child) {
6948 ASSERT(DEVI_BUSY_OWNED(vdip));
6949 return (i_mdi_pm_pre_unconfig_one(child, held, flags));
6950 }
6951
6952 /* devi_unconfig_common */
6953 ndi_devi_enter(vdip, &circ);
6954 cdip = ddi_get_child(vdip);
6955 while (cdip) {
6956 dev_info_t *next = ddi_get_next_sibling(cdip);
6957
6958 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags);
6959 cdip = next;
6960 }
6961 ndi_devi_exit(vdip, circ);
6962
6963 if (*held)
6964 ret = MDI_SUCCESS;
6965
6966 return (ret);
6967 }
6968
6969 static void
6970 i_mdi_pm_post_config_one(dev_info_t *child)
6971 {
6972 mdi_client_t *ct;
6973
6974 ct = i_devi_get_client(child);
6975 if (ct == NULL)
6976 return;
6977
6978 MDI_CLIENT_LOCK(ct);
6979 while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6980 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6981
6982 if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) {
6983 MDI_DEBUG(4, (MDI_NOTE, child, "not configured\n"));
6984 MDI_CLIENT_UNLOCK(ct);
6985 return;
6986 }
6987
6988 /* client has not been updated */
6989 if (MDI_CLIENT_IS_FAILED(ct)) {
6990 MDI_DEBUG(4, (MDI_NOTE, child, "client failed\n"));
6991 MDI_CLIENT_UNLOCK(ct);
6992 return;
6993 }
6994
6995 /* another thread might have powered it down or detached it */
6996 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6997 !DEVI_IS_ATTACHING(child)) ||
6998 (!i_ddi_devi_attached(child) &&
6999 !DEVI_IS_ATTACHING(child))) {
7000 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n"));
7001 i_mdi_pm_reset_client(ct);
7002 } else {
7003 mdi_pathinfo_t *pip, *next;
7004 int valid_path_count = 0;
7005
7006 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n"));
7007 pip = ct->ct_path_head;
7008 while (pip != NULL) {
7009 MDI_PI_LOCK(pip);
7010 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
7011 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
7012 valid_path_count ++;
7013 MDI_PI_UNLOCK(pip);
7014 pip = next;
7015 }
7016 i_mdi_pm_rele_client(ct, valid_path_count);
7017 }
7018 ct->ct_powercnt_config = 0;
7019 MDI_CLIENT_UNLOCK(ct);
7020 }
7021
7022 static void
7023 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child)
7024 {
7025 int circ;
7026 dev_info_t *cdip;
7027
7028 ASSERT(MDI_VHCI(vdip));
7029
7030 /* ndi_devi_config_one */
7031 if (child) {
7032 ASSERT(DEVI_BUSY_OWNED(vdip));
7033 i_mdi_pm_post_config_one(child);
7034 return;
7035 }
7036
7037 /* devi_config_common */
7038 ndi_devi_enter(vdip, &circ);
7039 cdip = ddi_get_child(vdip);
7040 while (cdip) {
7041 dev_info_t *next = ddi_get_next_sibling(cdip);
7042
7043 i_mdi_pm_post_config_one(cdip);
7044 cdip = next;
7045 }
7046 ndi_devi_exit(vdip, circ);
7047 }
7048
7049 static void
7050 i_mdi_pm_post_unconfig_one(dev_info_t *child)
7051 {
7052 mdi_client_t *ct;
7053
7054 ct = i_devi_get_client(child);
7055 if (ct == NULL)
7056 return;
7057
7058 MDI_CLIENT_LOCK(ct);
7059 while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
7060 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
7061
7062 if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) {
7063 MDI_DEBUG(4, (MDI_NOTE, child, "not held\n"));
7064 MDI_CLIENT_UNLOCK(ct);
7065 return;
7066 }
7067
7068 /* failure detaching or another thread just attached it */
7069 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
7070 i_ddi_devi_attached(child)) ||
7071 (!i_ddi_devi_attached(child) &&
7072 !DEVI_IS_ATTACHING(child))) {
7073 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n"));
7074 i_mdi_pm_reset_client(ct);
7075 } else {
7076 mdi_pathinfo_t *pip, *next;
7077 int valid_path_count = 0;
7078
7079 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n"));
7080 pip = ct->ct_path_head;
7081 while (pip != NULL) {
7082 MDI_PI_LOCK(pip);
7083 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
7084 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
7085 valid_path_count ++;
7086 MDI_PI_UNLOCK(pip);
7087 pip = next;
7088 }
7089 i_mdi_pm_rele_client(ct, valid_path_count);
7090 ct->ct_powercnt_unconfig = 0;
7091 }
7092
7093 MDI_CLIENT_UNLOCK(ct);
7094 }
7095
7096 static void
7097 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held)
7098 {
7099 int circ;
7100 dev_info_t *cdip;
7101
7102 ASSERT(MDI_VHCI(vdip));
7103
7104 if (!held) {
7105 MDI_DEBUG(4, (MDI_NOTE, vdip, "held = %d", held));
7106 return;
7107 }
7108
7109 if (child) {
7110 ASSERT(DEVI_BUSY_OWNED(vdip));
7111 i_mdi_pm_post_unconfig_one(child);
7112 return;
7113 }
7114
7115 ndi_devi_enter(vdip, &circ);
7116 cdip = ddi_get_child(vdip);
7117 while (cdip) {
7118 dev_info_t *next = ddi_get_next_sibling(cdip);
7119
7120 i_mdi_pm_post_unconfig_one(cdip);
7121 cdip = next;
7122 }
7123 ndi_devi_exit(vdip, circ);
7124 }
7125
7126 int
7127 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags)
7128 {
7129 int circ, ret = MDI_SUCCESS;
7130 dev_info_t *client_dip = NULL;
7131 mdi_client_t *ct;
7132
7133 /*
7134 * Handling ndi_devi_config_one and ndi_devi_unconfig_one.
7135 * Power up pHCI for the named client device.
7136 * Note: Before the client is enumerated under vhci by phci,
7137 * client_dip can be NULL. Then proceed to power up all the
7138 * pHCIs.
7139 */
7140 if (devnm != NULL) {
7141 ndi_devi_enter(vdip, &circ);
7142 client_dip = ndi_devi_findchild(vdip, devnm);
7143 }
7144
7145 MDI_DEBUG(4, (MDI_NOTE, vdip,
7146 "op = %d %s %p", op, devnm ? devnm : "", (void *)client_dip));
7147
7148 switch (op) {
7149 case MDI_PM_PRE_CONFIG:
7150 ret = i_mdi_pm_pre_config(vdip, client_dip);
7151 break;
7152
7153 case MDI_PM_PRE_UNCONFIG:
7154 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args,
7155 flags);
7156 break;
7157
7158 case MDI_PM_POST_CONFIG:
7159 i_mdi_pm_post_config(vdip, client_dip);
7160 break;
7161
7162 case MDI_PM_POST_UNCONFIG:
7163 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args);
7164 break;
7165
7166 case MDI_PM_HOLD_POWER:
7167 case MDI_PM_RELE_POWER:
7168 ASSERT(args);
7169
7170 client_dip = (dev_info_t *)args;
7171 ASSERT(MDI_CLIENT(client_dip));
7172
7173 ct = i_devi_get_client(client_dip);
7174 MDI_CLIENT_LOCK(ct);
7175
7176 if (op == MDI_PM_HOLD_POWER) {
7177 if (ct->ct_power_cnt == 0) {
7178 (void) i_mdi_power_all_phci(ct);
7179 MDI_DEBUG(4, (MDI_NOTE, client_dip,
7180 "i_mdi_pm_hold_client\n"));
7181 i_mdi_pm_hold_client(ct, ct->ct_path_count);
7182 }
7183 } else {
7184 if (DEVI_IS_ATTACHING(client_dip)) {
7185 MDI_DEBUG(4, (MDI_NOTE, client_dip,
7186 "i_mdi_pm_rele_client\n"));
7187 i_mdi_pm_rele_client(ct, ct->ct_path_count);
7188 } else {
7189 MDI_DEBUG(4, (MDI_NOTE, client_dip,
7190 "i_mdi_pm_reset_client\n"));
7191 i_mdi_pm_reset_client(ct);
7192 }
7193 }
7194
7195 MDI_CLIENT_UNLOCK(ct);
7196 break;
7197
7198 default:
7199 break;
7200 }
7201
7202 if (devnm)
7203 ndi_devi_exit(vdip, circ);
7204
7205 return (ret);
7206 }
7207
7208 int
7209 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class)
7210 {
7211 mdi_vhci_t *vhci;
7212
7213 if (!MDI_VHCI(dip))
7214 return (MDI_FAILURE);
7215
7216 if (mdi_class) {
7217 vhci = DEVI(dip)->devi_mdi_xhci;
7218 ASSERT(vhci);
7219 *mdi_class = vhci->vh_class;
7220 }
7221
7222 return (MDI_SUCCESS);
7223 }
7224
7225 int
7226 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class)
7227 {
7228 mdi_phci_t *phci;
7229
7230 if (!MDI_PHCI(dip))
7231 return (MDI_FAILURE);
7232
7233 if (mdi_class) {
7234 phci = DEVI(dip)->devi_mdi_xhci;
7235 ASSERT(phci);
7236 *mdi_class = phci->ph_vhci->vh_class;
7237 }
7238
7239 return (MDI_SUCCESS);
7240 }
7241
7242 int
7243 mdi_component_is_client(dev_info_t *dip, const char **mdi_class)
7244 {
7245 mdi_client_t *client;
7246
7247 if (!MDI_CLIENT(dip))
7248 return (MDI_FAILURE);
7249
7250 if (mdi_class) {
7251 client = DEVI(dip)->devi_mdi_client;
7252 ASSERT(client);
7253 *mdi_class = client->ct_vhci->vh_class;
7254 }
7255
7256 return (MDI_SUCCESS);
7257 }
7258
7259 void *
7260 mdi_client_get_vhci_private(dev_info_t *dip)
7261 {
7262 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
7263 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
7264 mdi_client_t *ct;
7265 ct = i_devi_get_client(dip);
7266 return (ct->ct_vprivate);
7267 }
7268 return (NULL);
7269 }
7270
7271 void
7272 mdi_client_set_vhci_private(dev_info_t *dip, void *data)
7273 {
7274 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
7275 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
7276 mdi_client_t *ct;
7277 ct = i_devi_get_client(dip);
7278 ct->ct_vprivate = data;
7279 }
7280 }
7281 /*
7282 * mdi_pi_get_vhci_private():
7283 * Get the vhci private information associated with the
7284 * mdi_pathinfo node
7285 */
7286 void *
7287 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip)
7288 {
7289 caddr_t vprivate = NULL;
7290 if (pip) {
7291 vprivate = MDI_PI(pip)->pi_vprivate;
7292 }
7293 return (vprivate);
7294 }
7295
7296 /*
7297 * mdi_pi_set_vhci_private():
7298 * Set the vhci private information in the mdi_pathinfo node
7299 */
7300 void
7301 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv)
7302 {
7303 if (pip) {
7304 MDI_PI(pip)->pi_vprivate = priv;
7305 }
7306 }
7307
7308 /*
7309 * mdi_phci_get_vhci_private():
7310 * Get the vhci private information associated with the
7311 * mdi_phci node
7312 */
7313 void *
7314 mdi_phci_get_vhci_private(dev_info_t *dip)
7315 {
7316 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
7317 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
7318 mdi_phci_t *ph;
7319 ph = i_devi_get_phci(dip);
7320 return (ph->ph_vprivate);
7321 }
7322 return (NULL);
7323 }
7324
7325 /*
7326 * mdi_phci_set_vhci_private():
7327 * Set the vhci private information in the mdi_phci node
7328 */
7329 void
7330 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv)
7331 {
7332 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
7333 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
7334 mdi_phci_t *ph;
7335 ph = i_devi_get_phci(dip);
7336 ph->ph_vprivate = priv;
7337 }
7338 }
7339
7340 int
7341 mdi_pi_ishidden(mdi_pathinfo_t *pip)
7342 {
7343 return (MDI_PI_FLAGS_IS_HIDDEN(pip));
7344 }
7345
7346 int
7347 mdi_pi_device_isremoved(mdi_pathinfo_t *pip)
7348 {
7349 return (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip));
7350 }
7351
7352 /* Return 1 if all client paths are device_removed */
7353 static int
7354 i_mdi_client_all_devices_removed(mdi_client_t *ct)
7355 {
7356 mdi_pathinfo_t *pip;
7357 int all_devices_removed = 1;
7358
7359 MDI_CLIENT_LOCK(ct);
7360 for (pip = ct->ct_path_head; pip;
7361 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) {
7362 if (!mdi_pi_device_isremoved(pip)) {
7363 all_devices_removed = 0;
7364 break;
7365 }
7366 }
7367 MDI_CLIENT_UNLOCK(ct);
7368 return (all_devices_removed);
7369 }
7370
7371 /*
7372 * When processing path hotunplug, represent device removal.
7373 */
7374 int
7375 mdi_pi_device_remove(mdi_pathinfo_t *pip)
7376 {
7377 mdi_client_t *ct;
7378
7379 MDI_PI_LOCK(pip);
7380 if (mdi_pi_device_isremoved(pip)) {
7381 MDI_PI_UNLOCK(pip);
7382 return (0);
7383 }
7384 MDI_PI_FLAGS_SET_DEVICE_REMOVED(pip);
7385 MDI_PI_FLAGS_SET_HIDDEN(pip);
7386 MDI_PI_UNLOCK(pip);
7387
7388 /*
7389 * If all paths associated with the client are now DEVICE_REMOVED,
7390 * reflect DEVICE_REMOVED in the client.
7391 */
7392 ct = MDI_PI(pip)->pi_client;
7393 if (ct && ct->ct_dip && i_mdi_client_all_devices_removed(ct))
7394 (void) ndi_devi_device_remove(ct->ct_dip);
7395 else
7396 i_ddi_di_cache_invalidate();
7397
7398 return (1);
7399 }
7400
7401 /*
7402 * When processing hotplug, if a path marked mdi_pi_device_isremoved()
7403 * is now accessible then this interfaces is used to represent device insertion.
7404 */
7405 int
7406 mdi_pi_device_insert(mdi_pathinfo_t *pip)
7407 {
7408 MDI_PI_LOCK(pip);
7409 if (!mdi_pi_device_isremoved(pip)) {
7410 MDI_PI_UNLOCK(pip);
7411 return (0);
7412 }
7413 MDI_PI_FLAGS_CLR_DEVICE_REMOVED(pip);
7414 MDI_PI_FLAGS_CLR_HIDDEN(pip);
7415 MDI_PI_UNLOCK(pip);
7416
7417 i_ddi_di_cache_invalidate();
7418
7419 return (1);
7420 }
7421
7422 /*
7423 * List of vhci class names:
7424 * A vhci class name must be in this list only if the corresponding vhci
7425 * driver intends to use the mdi provided bus config implementation
7426 * (i.e., mdi_vhci_bus_config()).
7427 */
7428 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB };
7429 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *))
7430
7431 /*
7432 * During boot time, the on-disk vhci cache for every vhci class is read
7433 * in the form of an nvlist and stored here.
7434 */
7435 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES];
7436
7437 /* nvpair names in vhci cache nvlist */
7438 #define MDI_VHCI_CACHE_VERSION 1
7439 #define MDI_NVPNAME_VERSION "version"
7440 #define MDI_NVPNAME_PHCIS "phcis"
7441 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap"
7442
7443 /*
7444 * Given vhci class name, return its on-disk vhci cache filename.
7445 * Memory for the returned filename which includes the full path is allocated
7446 * by this function.
7447 */
7448 static char *
7449 vhclass2vhcache_filename(char *vhclass)
7450 {
7451 char *filename;
7452 int len;
7453 static char *fmt = "/etc/devices/mdi_%s_cache";
7454
7455 /*
7456 * fmt contains the on-disk vhci cache file name format;
7457 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache".
7458 */
7459
7460 /* the -1 below is to account for "%s" in the format string */
7461 len = strlen(fmt) + strlen(vhclass) - 1;
7462 filename = kmem_alloc(len, KM_SLEEP);
7463 (void) snprintf(filename, len, fmt, vhclass);
7464 ASSERT(len == (strlen(filename) + 1));
7465 return (filename);
7466 }
7467
7468 /*
7469 * initialize the vhci cache related data structures and read the on-disk
7470 * vhci cached data into memory.
7471 */
7472 static void
7473 setup_vhci_cache(mdi_vhci_t *vh)
7474 {
7475 mdi_vhci_config_t *vhc;
7476 mdi_vhci_cache_t *vhcache;
7477 int i;
7478 nvlist_t *nvl = NULL;
7479
7480 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP);
7481 vh->vh_config = vhc;
7482 vhcache = &vhc->vhc_vhcache;
7483
7484 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class);
7485
7486 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL);
7487 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL);
7488
7489 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL);
7490
7491 /*
7492 * Create string hash; same as mod_hash_create_strhash() except that
7493 * we use NULL key destructor.
7494 */
7495 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class,
7496 mdi_bus_config_cache_hash_size,
7497 mod_hash_null_keydtor, mod_hash_null_valdtor,
7498 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
7499
7500 /*
7501 * The on-disk vhci cache is read during booting prior to the
7502 * lights-out period by mdi_read_devices_files().
7503 */
7504 for (i = 0; i < N_VHCI_CLASSES; i++) {
7505 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) {
7506 nvl = vhcache_nvl[i];
7507 vhcache_nvl[i] = NULL;
7508 break;
7509 }
7510 }
7511
7512 /*
7513 * this is to cover the case of some one manually causing unloading
7514 * (or detaching) and reloading (or attaching) of a vhci driver.
7515 */
7516 if (nvl == NULL && modrootloaded)
7517 nvl = read_on_disk_vhci_cache(vh->vh_class);
7518
7519 if (nvl != NULL) {
7520 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7521 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS)
7522 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
7523 else {
7524 cmn_err(CE_WARN,
7525 "%s: data file corrupted, will recreate",
7526 vhc->vhc_vhcache_filename);
7527 }
7528 rw_exit(&vhcache->vhcache_lock);
7529 nvlist_free(nvl);
7530 }
7531
7532 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc,
7533 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush");
7534
7535 vhc->vhc_path_discovery_boot = mdi_path_discovery_boot;
7536 vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot;
7537 }
7538
7539 /*
7540 * free all vhci cache related resources
7541 */
7542 static int
7543 destroy_vhci_cache(mdi_vhci_t *vh)
7544 {
7545 mdi_vhci_config_t *vhc = vh->vh_config;
7546 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7547 mdi_vhcache_phci_t *cphci, *cphci_next;
7548 mdi_vhcache_client_t *cct, *cct_next;
7549 mdi_vhcache_pathinfo_t *cpi, *cpi_next;
7550
7551 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS)
7552 return (MDI_FAILURE);
7553
7554 kmem_free(vhc->vhc_vhcache_filename,
7555 strlen(vhc->vhc_vhcache_filename) + 1);
7556
7557 mod_hash_destroy_strhash(vhcache->vhcache_client_hash);
7558
7559 for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7560 cphci = cphci_next) {
7561 cphci_next = cphci->cphci_next;
7562 free_vhcache_phci(cphci);
7563 }
7564
7565 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) {
7566 cct_next = cct->cct_next;
7567 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) {
7568 cpi_next = cpi->cpi_next;
7569 free_vhcache_pathinfo(cpi);
7570 }
7571 free_vhcache_client(cct);
7572 }
7573
7574 rw_destroy(&vhcache->vhcache_lock);
7575
7576 mutex_destroy(&vhc->vhc_lock);
7577 cv_destroy(&vhc->vhc_cv);
7578 kmem_free(vhc, sizeof (mdi_vhci_config_t));
7579 return (MDI_SUCCESS);
7580 }
7581
7582 /*
7583 * Stop all vhci cache related async threads and free their resources.
7584 */
7585 static int
7586 stop_vhcache_async_threads(mdi_vhci_config_t *vhc)
7587 {
7588 mdi_async_client_config_t *acc, *acc_next;
7589
7590 mutex_enter(&vhc->vhc_lock);
7591 vhc->vhc_flags |= MDI_VHC_EXIT;
7592 ASSERT(vhc->vhc_acc_thrcount >= 0);
7593 cv_broadcast(&vhc->vhc_cv);
7594
7595 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) ||
7596 vhc->vhc_acc_thrcount != 0) {
7597 mutex_exit(&vhc->vhc_lock);
7598 delay_random(mdi_delay);
7599 mutex_enter(&vhc->vhc_lock);
7600 }
7601
7602 vhc->vhc_flags &= ~MDI_VHC_EXIT;
7603
7604 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) {
7605 acc_next = acc->acc_next;
7606 free_async_client_config(acc);
7607 }
7608 vhc->vhc_acc_list_head = NULL;
7609 vhc->vhc_acc_list_tail = NULL;
7610 vhc->vhc_acc_count = 0;
7611
7612 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7613 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7614 mutex_exit(&vhc->vhc_lock);
7615 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) {
7616 vhcache_dirty(vhc);
7617 return (MDI_FAILURE);
7618 }
7619 } else
7620 mutex_exit(&vhc->vhc_lock);
7621
7622 if (callb_delete(vhc->vhc_cbid) != 0)
7623 return (MDI_FAILURE);
7624
7625 return (MDI_SUCCESS);
7626 }
7627
7628 /*
7629 * Stop vhci cache flush thread
7630 */
7631 /* ARGSUSED */
7632 static boolean_t
7633 stop_vhcache_flush_thread(void *arg, int code)
7634 {
7635 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7636
7637 mutex_enter(&vhc->vhc_lock);
7638 vhc->vhc_flags |= MDI_VHC_EXIT;
7639 cv_broadcast(&vhc->vhc_cv);
7640
7641 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
7642 mutex_exit(&vhc->vhc_lock);
7643 delay_random(mdi_delay);
7644 mutex_enter(&vhc->vhc_lock);
7645 }
7646
7647 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7648 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7649 mutex_exit(&vhc->vhc_lock);
7650 (void) flush_vhcache(vhc, 1);
7651 } else
7652 mutex_exit(&vhc->vhc_lock);
7653
7654 return (B_TRUE);
7655 }
7656
7657 /*
7658 * Enqueue the vhcache phci (cphci) at the tail of the list
7659 */
7660 static void
7661 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci)
7662 {
7663 cphci->cphci_next = NULL;
7664 if (vhcache->vhcache_phci_head == NULL)
7665 vhcache->vhcache_phci_head = cphci;
7666 else
7667 vhcache->vhcache_phci_tail->cphci_next = cphci;
7668 vhcache->vhcache_phci_tail = cphci;
7669 }
7670
7671 /*
7672 * Enqueue the vhcache pathinfo (cpi) at the tail of the list
7673 */
7674 static void
7675 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7676 mdi_vhcache_pathinfo_t *cpi)
7677 {
7678 cpi->cpi_next = NULL;
7679 if (cct->cct_cpi_head == NULL)
7680 cct->cct_cpi_head = cpi;
7681 else
7682 cct->cct_cpi_tail->cpi_next = cpi;
7683 cct->cct_cpi_tail = cpi;
7684 }
7685
7686 /*
7687 * Enqueue the vhcache pathinfo (cpi) at the correct location in the
7688 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
7689 * flag set come at the beginning of the list. All cpis which have this
7690 * flag set come at the end of the list.
7691 */
7692 static void
7693 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7694 mdi_vhcache_pathinfo_t *newcpi)
7695 {
7696 mdi_vhcache_pathinfo_t *cpi, *prev_cpi;
7697
7698 if (cct->cct_cpi_head == NULL ||
7699 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))
7700 enqueue_tail_vhcache_pathinfo(cct, newcpi);
7701 else {
7702 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL &&
7703 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST);
7704 prev_cpi = cpi, cpi = cpi->cpi_next)
7705 ;
7706
7707 if (prev_cpi == NULL)
7708 cct->cct_cpi_head = newcpi;
7709 else
7710 prev_cpi->cpi_next = newcpi;
7711
7712 newcpi->cpi_next = cpi;
7713
7714 if (cpi == NULL)
7715 cct->cct_cpi_tail = newcpi;
7716 }
7717 }
7718
7719 /*
7720 * Enqueue the vhcache client (cct) at the tail of the list
7721 */
7722 static void
7723 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache,
7724 mdi_vhcache_client_t *cct)
7725 {
7726 cct->cct_next = NULL;
7727 if (vhcache->vhcache_client_head == NULL)
7728 vhcache->vhcache_client_head = cct;
7729 else
7730 vhcache->vhcache_client_tail->cct_next = cct;
7731 vhcache->vhcache_client_tail = cct;
7732 }
7733
7734 static void
7735 free_string_array(char **str, int nelem)
7736 {
7737 int i;
7738
7739 if (str) {
7740 for (i = 0; i < nelem; i++) {
7741 if (str[i])
7742 kmem_free(str[i], strlen(str[i]) + 1);
7743 }
7744 kmem_free(str, sizeof (char *) * nelem);
7745 }
7746 }
7747
7748 static void
7749 free_vhcache_phci(mdi_vhcache_phci_t *cphci)
7750 {
7751 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1);
7752 kmem_free(cphci, sizeof (*cphci));
7753 }
7754
7755 static void
7756 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi)
7757 {
7758 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1);
7759 kmem_free(cpi, sizeof (*cpi));
7760 }
7761
7762 static void
7763 free_vhcache_client(mdi_vhcache_client_t *cct)
7764 {
7765 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1);
7766 kmem_free(cct, sizeof (*cct));
7767 }
7768
7769 static char *
7770 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len)
7771 {
7772 char *name_addr;
7773 int len;
7774
7775 len = strlen(ct_name) + strlen(ct_addr) + 2;
7776 name_addr = kmem_alloc(len, KM_SLEEP);
7777 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr);
7778
7779 if (ret_len)
7780 *ret_len = len;
7781 return (name_addr);
7782 }
7783
7784 /*
7785 * Copy the contents of paddrnvl to vhci cache.
7786 * paddrnvl nvlist contains path information for a vhci client.
7787 * See the comment in mainnvl_to_vhcache() for the format of this nvlist.
7788 */
7789 static void
7790 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[],
7791 mdi_vhcache_client_t *cct)
7792 {
7793 nvpair_t *nvp = NULL;
7794 mdi_vhcache_pathinfo_t *cpi;
7795 uint_t nelem;
7796 uint32_t *val;
7797
7798 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7799 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY);
7800 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
7801 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7802 (void) nvpair_value_uint32_array(nvp, &val, &nelem);
7803 ASSERT(nelem == 2);
7804 cpi->cpi_cphci = cphci_list[val[0]];
7805 cpi->cpi_flags = val[1];
7806 enqueue_tail_vhcache_pathinfo(cct, cpi);
7807 }
7808 }
7809
7810 /*
7811 * Copy the contents of caddrmapnvl to vhci cache.
7812 * caddrmapnvl nvlist contains vhci client address to phci client address
7813 * mappings. See the comment in mainnvl_to_vhcache() for the format of
7814 * this nvlist.
7815 */
7816 static void
7817 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl,
7818 mdi_vhcache_phci_t *cphci_list[])
7819 {
7820 nvpair_t *nvp = NULL;
7821 nvlist_t *paddrnvl;
7822 mdi_vhcache_client_t *cct;
7823
7824 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7825 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST);
7826 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
7827 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7828 (void) nvpair_value_nvlist(nvp, &paddrnvl);
7829 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct);
7830 /* the client must contain at least one path */
7831 ASSERT(cct->cct_cpi_head != NULL);
7832
7833 enqueue_vhcache_client(vhcache, cct);
7834 (void) mod_hash_insert(vhcache->vhcache_client_hash,
7835 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
7836 }
7837 }
7838
7839 /*
7840 * Copy the contents of the main nvlist to vhci cache.
7841 *
7842 * VHCI busconfig cached data is stored in the form of a nvlist on the disk.
7843 * The nvlist contains the mappings between the vhci client addresses and
7844 * their corresponding phci client addresses.
7845 *
7846 * The structure of the nvlist is as follows:
7847 *
7848 * Main nvlist:
7849 * NAME TYPE DATA
7850 * version int32 version number
7851 * phcis string array array of phci paths
7852 * clientaddrmap nvlist_t c2paddrs_nvl (see below)
7853 *
7854 * structure of c2paddrs_nvl:
7855 * NAME TYPE DATA
7856 * caddr1 nvlist_t paddrs_nvl1
7857 * caddr2 nvlist_t paddrs_nvl2
7858 * ...
7859 * where caddr1, caddr2, ... are vhci client name and addresses in the
7860 * form of "<clientname>@<clientaddress>".
7861 * (for example: "ssd@2000002037cd9f72");
7862 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information.
7863 *
7864 * structure of paddrs_nvl:
7865 * NAME TYPE DATA
7866 * pi_addr1 uint32_array (phci-id, cpi_flags)
7867 * pi_addr2 uint32_array (phci-id, cpi_flags)
7868 * ...
7869 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes
7870 * (so called pi_addrs, for example: "w2100002037cd9f72,0");
7871 * phci-ids are integers that identify pHCIs to which the
7872 * the bus specific address belongs to. These integers are used as an index
7873 * into to the phcis string array in the main nvlist to get the pHCI path.
7874 */
7875 static int
7876 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl)
7877 {
7878 char **phcis, **phci_namep;
7879 uint_t nphcis;
7880 mdi_vhcache_phci_t *cphci, **cphci_list;
7881 nvlist_t *caddrmapnvl;
7882 int32_t ver;
7883 int i;
7884 size_t cphci_list_size;
7885
7886 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock));
7887
7888 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 ||
7889 ver != MDI_VHCI_CACHE_VERSION)
7890 return (MDI_FAILURE);
7891
7892 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis,
7893 &nphcis) != 0)
7894 return (MDI_SUCCESS);
7895
7896 ASSERT(nphcis > 0);
7897
7898 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis;
7899 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP);
7900 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) {
7901 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP);
7902 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP);
7903 enqueue_vhcache_phci(vhcache, cphci);
7904 cphci_list[i] = cphci;
7905 }
7906
7907 ASSERT(vhcache->vhcache_phci_head != NULL);
7908
7909 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0)
7910 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list);
7911
7912 kmem_free(cphci_list, cphci_list_size);
7913 return (MDI_SUCCESS);
7914 }
7915
7916 /*
7917 * Build paddrnvl for the specified client using the information in the
7918 * vhci cache and add it to the caddrmapnnvl.
7919 * Returns 0 on success, errno on failure.
7920 */
7921 static int
7922 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct,
7923 nvlist_t *caddrmapnvl)
7924 {
7925 mdi_vhcache_pathinfo_t *cpi;
7926 nvlist_t *nvl;
7927 int err;
7928 uint32_t val[2];
7929
7930 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7931
7932 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0)
7933 return (err);
7934
7935 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7936 val[0] = cpi->cpi_cphci->cphci_id;
7937 val[1] = cpi->cpi_flags;
7938 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2))
7939 != 0)
7940 goto out;
7941 }
7942
7943 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl);
7944 out:
7945 nvlist_free(nvl);
7946 return (err);
7947 }
7948
7949 /*
7950 * Build caddrmapnvl using the information in the vhci cache
7951 * and add it to the mainnvl.
7952 * Returns 0 on success, errno on failure.
7953 */
7954 static int
7955 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl)
7956 {
7957 mdi_vhcache_client_t *cct;
7958 nvlist_t *nvl;
7959 int err;
7960
7961 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7962
7963 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0)
7964 return (err);
7965
7966 for (cct = vhcache->vhcache_client_head; cct != NULL;
7967 cct = cct->cct_next) {
7968 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0)
7969 goto out;
7970 }
7971
7972 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl);
7973 out:
7974 nvlist_free(nvl);
7975 return (err);
7976 }
7977
7978 /*
7979 * Build nvlist using the information in the vhci cache.
7980 * See the comment in mainnvl_to_vhcache() for the format of the nvlist.
7981 * Returns nvl on success, NULL on failure.
7982 */
7983 static nvlist_t *
7984 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache)
7985 {
7986 mdi_vhcache_phci_t *cphci;
7987 uint_t phci_count;
7988 char **phcis;
7989 nvlist_t *nvl;
7990 int err, i;
7991
7992 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) {
7993 nvl = NULL;
7994 goto out;
7995 }
7996
7997 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION,
7998 MDI_VHCI_CACHE_VERSION)) != 0)
7999 goto out;
8000
8001 rw_enter(&vhcache->vhcache_lock, RW_READER);
8002 if (vhcache->vhcache_phci_head == NULL) {
8003 rw_exit(&vhcache->vhcache_lock);
8004 return (nvl);
8005 }
8006
8007 phci_count = 0;
8008 for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8009 cphci = cphci->cphci_next)
8010 cphci->cphci_id = phci_count++;
8011
8012 /* build phci pathname list */
8013 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP);
8014 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL;
8015 cphci = cphci->cphci_next, i++)
8016 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP);
8017
8018 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis,
8019 phci_count);
8020 free_string_array(phcis, phci_count);
8021
8022 if (err == 0 &&
8023 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) {
8024 rw_exit(&vhcache->vhcache_lock);
8025 return (nvl);
8026 }
8027
8028 rw_exit(&vhcache->vhcache_lock);
8029 out:
8030 nvlist_free(nvl);
8031 return (NULL);
8032 }
8033
8034 /*
8035 * Lookup vhcache phci structure for the specified phci path.
8036 */
8037 static mdi_vhcache_phci_t *
8038 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path)
8039 {
8040 mdi_vhcache_phci_t *cphci;
8041
8042 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8043
8044 for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8045 cphci = cphci->cphci_next) {
8046 if (strcmp(cphci->cphci_path, phci_path) == 0)
8047 return (cphci);
8048 }
8049
8050 return (NULL);
8051 }
8052
8053 /*
8054 * Lookup vhcache phci structure for the specified phci.
8055 */
8056 static mdi_vhcache_phci_t *
8057 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph)
8058 {
8059 mdi_vhcache_phci_t *cphci;
8060
8061 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8062
8063 for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8064 cphci = cphci->cphci_next) {
8065 if (cphci->cphci_phci == ph)
8066 return (cphci);
8067 }
8068
8069 return (NULL);
8070 }
8071
8072 /*
8073 * Add the specified phci to the vhci cache if not already present.
8074 */
8075 static void
8076 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
8077 {
8078 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8079 mdi_vhcache_phci_t *cphci;
8080 char *pathname;
8081 int cache_updated;
8082
8083 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8084
8085 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8086 (void) ddi_pathname(ph->ph_dip, pathname);
8087 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname))
8088 != NULL) {
8089 cphci->cphci_phci = ph;
8090 cache_updated = 0;
8091 } else {
8092 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP);
8093 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP);
8094 cphci->cphci_phci = ph;
8095 enqueue_vhcache_phci(vhcache, cphci);
8096 cache_updated = 1;
8097 }
8098
8099 rw_exit(&vhcache->vhcache_lock);
8100
8101 /*
8102 * Since a new phci has been added, reset
8103 * vhc_path_discovery_cutoff_time to allow for discovery of paths
8104 * during next vhcache_discover_paths().
8105 */
8106 mutex_enter(&vhc->vhc_lock);
8107 vhc->vhc_path_discovery_cutoff_time = 0;
8108 mutex_exit(&vhc->vhc_lock);
8109
8110 kmem_free(pathname, MAXPATHLEN);
8111 if (cache_updated)
8112 vhcache_dirty(vhc);
8113 }
8114
8115 /*
8116 * Remove the reference to the specified phci from the vhci cache.
8117 */
8118 static void
8119 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
8120 {
8121 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8122 mdi_vhcache_phci_t *cphci;
8123
8124 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8125 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) {
8126 /* do not remove the actual mdi_vhcache_phci structure */
8127 cphci->cphci_phci = NULL;
8128 }
8129 rw_exit(&vhcache->vhcache_lock);
8130 }
8131
8132 static void
8133 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst,
8134 mdi_vhcache_lookup_token_t *src)
8135 {
8136 if (src == NULL) {
8137 dst->lt_cct = NULL;
8138 dst->lt_cct_lookup_time = 0;
8139 } else {
8140 dst->lt_cct = src->lt_cct;
8141 dst->lt_cct_lookup_time = src->lt_cct_lookup_time;
8142 }
8143 }
8144
8145 /*
8146 * Look up vhcache client for the specified client.
8147 */
8148 static mdi_vhcache_client_t *
8149 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr,
8150 mdi_vhcache_lookup_token_t *token)
8151 {
8152 mod_hash_val_t hv;
8153 char *name_addr;
8154 int len;
8155
8156 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8157
8158 /*
8159 * If no vhcache clean occurred since the last lookup, we can
8160 * simply return the cct from the last lookup operation.
8161 * It works because ccts are never freed except during the vhcache
8162 * cleanup operation.
8163 */
8164 if (token != NULL &&
8165 vhcache->vhcache_clean_time < token->lt_cct_lookup_time)
8166 return (token->lt_cct);
8167
8168 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len);
8169 if (mod_hash_find(vhcache->vhcache_client_hash,
8170 (mod_hash_key_t)name_addr, &hv) == 0) {
8171 if (token) {
8172 token->lt_cct = (mdi_vhcache_client_t *)hv;
8173 token->lt_cct_lookup_time = ddi_get_lbolt64();
8174 }
8175 } else {
8176 if (token) {
8177 token->lt_cct = NULL;
8178 token->lt_cct_lookup_time = 0;
8179 }
8180 hv = NULL;
8181 }
8182 kmem_free(name_addr, len);
8183 return ((mdi_vhcache_client_t *)hv);
8184 }
8185
8186 /*
8187 * Add the specified path to the vhci cache if not already present.
8188 * Also add the vhcache client for the client corresponding to this path
8189 * if it doesn't already exist.
8190 */
8191 static void
8192 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
8193 {
8194 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8195 mdi_vhcache_client_t *cct;
8196 mdi_vhcache_pathinfo_t *cpi;
8197 mdi_phci_t *ph = pip->pi_phci;
8198 mdi_client_t *ct = pip->pi_client;
8199 int cache_updated = 0;
8200
8201 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8202
8203 /* if vhcache client for this pip doesn't already exist, add it */
8204 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
8205 NULL)) == NULL) {
8206 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
8207 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname,
8208 ct->ct_guid, NULL);
8209 enqueue_vhcache_client(vhcache, cct);
8210 (void) mod_hash_insert(vhcache->vhcache_client_hash,
8211 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
8212 cache_updated = 1;
8213 }
8214
8215 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8216 if (cpi->cpi_cphci->cphci_phci == ph &&
8217 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) {
8218 cpi->cpi_pip = pip;
8219 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) {
8220 cpi->cpi_flags &=
8221 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8222 sort_vhcache_paths(cct);
8223 cache_updated = 1;
8224 }
8225 break;
8226 }
8227 }
8228
8229 if (cpi == NULL) {
8230 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
8231 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP);
8232 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph);
8233 ASSERT(cpi->cpi_cphci != NULL);
8234 cpi->cpi_pip = pip;
8235 enqueue_vhcache_pathinfo(cct, cpi);
8236 cache_updated = 1;
8237 }
8238
8239 rw_exit(&vhcache->vhcache_lock);
8240
8241 if (cache_updated)
8242 vhcache_dirty(vhc);
8243 }
8244
8245 /*
8246 * Remove the reference to the specified path from the vhci cache.
8247 */
8248 static void
8249 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
8250 {
8251 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8252 mdi_client_t *ct = pip->pi_client;
8253 mdi_vhcache_client_t *cct;
8254 mdi_vhcache_pathinfo_t *cpi;
8255
8256 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8257 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
8258 NULL)) != NULL) {
8259 for (cpi = cct->cct_cpi_head; cpi != NULL;
8260 cpi = cpi->cpi_next) {
8261 if (cpi->cpi_pip == pip) {
8262 cpi->cpi_pip = NULL;
8263 break;
8264 }
8265 }
8266 }
8267 rw_exit(&vhcache->vhcache_lock);
8268 }
8269
8270 /*
8271 * Flush the vhci cache to disk.
8272 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure.
8273 */
8274 static int
8275 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag)
8276 {
8277 nvlist_t *nvl;
8278 int err;
8279 int rv;
8280
8281 /*
8282 * It is possible that the system may shutdown before
8283 * i_ddi_io_initialized (during stmsboot for example). To allow for
8284 * flushing the cache in this case do not check for
8285 * i_ddi_io_initialized when force flag is set.
8286 */
8287 if (force_flag == 0 && !i_ddi_io_initialized())
8288 return (MDI_FAILURE);
8289
8290 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) {
8291 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl);
8292 nvlist_free(nvl);
8293 } else
8294 err = EFAULT;
8295
8296 rv = MDI_SUCCESS;
8297 mutex_enter(&vhc->vhc_lock);
8298 if (err != 0) {
8299 if (err == EROFS) {
8300 vhc->vhc_flags |= MDI_VHC_READONLY_FS;
8301 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR |
8302 MDI_VHC_VHCACHE_DIRTY);
8303 } else {
8304 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) {
8305 cmn_err(CE_CONT, "%s: update failed\n",
8306 vhc->vhc_vhcache_filename);
8307 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR;
8308 }
8309 rv = MDI_FAILURE;
8310 }
8311 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) {
8312 cmn_err(CE_CONT,
8313 "%s: update now ok\n", vhc->vhc_vhcache_filename);
8314 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR;
8315 }
8316 mutex_exit(&vhc->vhc_lock);
8317
8318 return (rv);
8319 }
8320
8321 /*
8322 * Call flush_vhcache() to flush the vhci cache at the scheduled time.
8323 * Exits itself if left idle for the idle timeout period.
8324 */
8325 static void
8326 vhcache_flush_thread(void *arg)
8327 {
8328 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8329 clock_t idle_time, quit_at_ticks;
8330 callb_cpr_t cprinfo;
8331
8332 /* number of seconds to sleep idle before exiting */
8333 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND;
8334
8335 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
8336 "mdi_vhcache_flush");
8337 mutex_enter(&vhc->vhc_lock);
8338 for (; ; ) {
8339 while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8340 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) {
8341 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) {
8342 CALLB_CPR_SAFE_BEGIN(&cprinfo);
8343 (void) cv_timedwait(&vhc->vhc_cv,
8344 &vhc->vhc_lock, vhc->vhc_flush_at_ticks);
8345 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8346 } else {
8347 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
8348 mutex_exit(&vhc->vhc_lock);
8349
8350 if (flush_vhcache(vhc, 0) != MDI_SUCCESS)
8351 vhcache_dirty(vhc);
8352
8353 mutex_enter(&vhc->vhc_lock);
8354 }
8355 }
8356
8357 quit_at_ticks = ddi_get_lbolt() + idle_time;
8358
8359 while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8360 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) &&
8361 ddi_get_lbolt() < quit_at_ticks) {
8362 CALLB_CPR_SAFE_BEGIN(&cprinfo);
8363 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8364 quit_at_ticks);
8365 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8366 }
8367
8368 if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
8369 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY))
8370 goto out;
8371 }
8372
8373 out:
8374 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD;
8375 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */
8376 CALLB_CPR_EXIT(&cprinfo);
8377 }
8378
8379 /*
8380 * Make vhci cache dirty and schedule flushing by vhcache flush thread.
8381 */
8382 static void
8383 vhcache_dirty(mdi_vhci_config_t *vhc)
8384 {
8385 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8386 int create_thread;
8387
8388 rw_enter(&vhcache->vhcache_lock, RW_READER);
8389 /* do not flush cache until the cache is fully built */
8390 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
8391 rw_exit(&vhcache->vhcache_lock);
8392 return;
8393 }
8394 rw_exit(&vhcache->vhcache_lock);
8395
8396 mutex_enter(&vhc->vhc_lock);
8397 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) {
8398 mutex_exit(&vhc->vhc_lock);
8399 return;
8400 }
8401
8402 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY;
8403 vhc->vhc_flush_at_ticks = ddi_get_lbolt() +
8404 mdi_vhcache_flush_delay * TICKS_PER_SECOND;
8405 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
8406 cv_broadcast(&vhc->vhc_cv);
8407 create_thread = 0;
8408 } else {
8409 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD;
8410 create_thread = 1;
8411 }
8412 mutex_exit(&vhc->vhc_lock);
8413
8414 if (create_thread)
8415 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc,
8416 0, &p0, TS_RUN, minclsyspri);
8417 }
8418
8419 /*
8420 * phci bus config structure - one for for each phci bus config operation that
8421 * we initiate on behalf of a vhci.
8422 */
8423 typedef struct mdi_phci_bus_config_s {
8424 char *phbc_phci_path;
8425 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */
8426 struct mdi_phci_bus_config_s *phbc_next;
8427 } mdi_phci_bus_config_t;
8428
8429 /* vhci bus config structure - one for each vhci bus config operation */
8430 typedef struct mdi_vhci_bus_config_s {
8431 ddi_bus_config_op_t vhbc_op; /* bus config op */
8432 major_t vhbc_op_major; /* bus config op major */
8433 uint_t vhbc_op_flags; /* bus config op flags */
8434 kmutex_t vhbc_lock;
8435 kcondvar_t vhbc_cv;
8436 int vhbc_thr_count;
8437 } mdi_vhci_bus_config_t;
8438
8439 /*
8440 * bus config the specified phci
8441 */
8442 static void
8443 bus_config_phci(void *arg)
8444 {
8445 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg;
8446 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig;
8447 dev_info_t *ph_dip;
8448
8449 /*
8450 * first configure all path components upto phci and then configure
8451 * the phci children.
8452 */
8453 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0))
8454 != NULL) {
8455 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER ||
8456 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) {
8457 (void) ndi_devi_config_driver(ph_dip,
8458 vhbc->vhbc_op_flags,
8459 vhbc->vhbc_op_major);
8460 } else
8461 (void) ndi_devi_config(ph_dip,
8462 vhbc->vhbc_op_flags);
8463
8464 /* release the hold that e_ddi_hold_devi_by_path() placed */
8465 ndi_rele_devi(ph_dip);
8466 }
8467
8468 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1);
8469 kmem_free(phbc, sizeof (*phbc));
8470
8471 mutex_enter(&vhbc->vhbc_lock);
8472 vhbc->vhbc_thr_count--;
8473 if (vhbc->vhbc_thr_count == 0)
8474 cv_broadcast(&vhbc->vhbc_cv);
8475 mutex_exit(&vhbc->vhbc_lock);
8476 }
8477
8478 /*
8479 * Bus config all phcis associated with the vhci in parallel.
8480 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL.
8481 */
8482 static void
8483 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags,
8484 ddi_bus_config_op_t op, major_t maj)
8485 {
8486 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next;
8487 mdi_vhci_bus_config_t *vhbc;
8488 mdi_vhcache_phci_t *cphci;
8489
8490 rw_enter(&vhcache->vhcache_lock, RW_READER);
8491 if (vhcache->vhcache_phci_head == NULL) {
8492 rw_exit(&vhcache->vhcache_lock);
8493 return;
8494 }
8495
8496 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP);
8497
8498 for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8499 cphci = cphci->cphci_next) {
8500 /* skip phcis that haven't attached before root is available */
8501 if (!modrootloaded && (cphci->cphci_phci == NULL))
8502 continue;
8503 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP);
8504 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path,
8505 KM_SLEEP);
8506 phbc->phbc_vhbusconfig = vhbc;
8507 phbc->phbc_next = phbc_head;
8508 phbc_head = phbc;
8509 vhbc->vhbc_thr_count++;
8510 }
8511 rw_exit(&vhcache->vhcache_lock);
8512
8513 vhbc->vhbc_op = op;
8514 vhbc->vhbc_op_major = maj;
8515 vhbc->vhbc_op_flags = NDI_NO_EVENT |
8516 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE));
8517 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL);
8518 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL);
8519
8520 /* now create threads to initiate bus config on all phcis in parallel */
8521 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) {
8522 phbc_next = phbc->phbc_next;
8523 if (mdi_mtc_off)
8524 bus_config_phci((void *)phbc);
8525 else
8526 (void) thread_create(NULL, 0, bus_config_phci, phbc,
8527 0, &p0, TS_RUN, minclsyspri);
8528 }
8529
8530 mutex_enter(&vhbc->vhbc_lock);
8531 /* wait until all threads exit */
8532 while (vhbc->vhbc_thr_count > 0)
8533 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock);
8534 mutex_exit(&vhbc->vhbc_lock);
8535
8536 mutex_destroy(&vhbc->vhbc_lock);
8537 cv_destroy(&vhbc->vhbc_cv);
8538 kmem_free(vhbc, sizeof (*vhbc));
8539 }
8540
8541 /*
8542 * Single threaded version of bus_config_all_phcis()
8543 */
8544 static void
8545 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags,
8546 ddi_bus_config_op_t op, major_t maj)
8547 {
8548 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8549
8550 single_threaded_vhconfig_enter(vhc);
8551 bus_config_all_phcis(vhcache, flags, op, maj);
8552 single_threaded_vhconfig_exit(vhc);
8553 }
8554
8555 /*
8556 * Perform BUS_CONFIG_ONE on the specified child of the phci.
8557 * The path includes the child component in addition to the phci path.
8558 */
8559 static int
8560 bus_config_one_phci_child(char *path)
8561 {
8562 dev_info_t *ph_dip, *child;
8563 char *devnm;
8564 int rv = MDI_FAILURE;
8565
8566 /* extract the child component of the phci */
8567 devnm = strrchr(path, '/');
8568 *devnm++ = '\0';
8569
8570 /*
8571 * first configure all path components upto phci and then
8572 * configure the phci child.
8573 */
8574 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) {
8575 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) ==
8576 NDI_SUCCESS) {
8577 /*
8578 * release the hold that ndi_devi_config_one() placed
8579 */
8580 ndi_rele_devi(child);
8581 rv = MDI_SUCCESS;
8582 }
8583
8584 /* release the hold that e_ddi_hold_devi_by_path() placed */
8585 ndi_rele_devi(ph_dip);
8586 }
8587
8588 devnm--;
8589 *devnm = '/';
8590 return (rv);
8591 }
8592
8593 /*
8594 * Build a list of phci client paths for the specified vhci client.
8595 * The list includes only those phci client paths which aren't configured yet.
8596 */
8597 static mdi_phys_path_t *
8598 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name)
8599 {
8600 mdi_vhcache_pathinfo_t *cpi;
8601 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp;
8602 int config_path, len;
8603
8604 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8605 /*
8606 * include only those paths that aren't configured.
8607 */
8608 config_path = 0;
8609 if (cpi->cpi_pip == NULL)
8610 config_path = 1;
8611 else {
8612 MDI_PI_LOCK(cpi->cpi_pip);
8613 if (MDI_PI_IS_INIT(cpi->cpi_pip))
8614 config_path = 1;
8615 MDI_PI_UNLOCK(cpi->cpi_pip);
8616 }
8617
8618 if (config_path) {
8619 pp = kmem_alloc(sizeof (*pp), KM_SLEEP);
8620 len = strlen(cpi->cpi_cphci->cphci_path) +
8621 strlen(ct_name) + strlen(cpi->cpi_addr) + 3;
8622 pp->phys_path = kmem_alloc(len, KM_SLEEP);
8623 (void) snprintf(pp->phys_path, len, "%s/%s@%s",
8624 cpi->cpi_cphci->cphci_path, ct_name,
8625 cpi->cpi_addr);
8626 pp->phys_path_next = NULL;
8627
8628 if (pp_head == NULL)
8629 pp_head = pp;
8630 else
8631 pp_tail->phys_path_next = pp;
8632 pp_tail = pp;
8633 }
8634 }
8635
8636 return (pp_head);
8637 }
8638
8639 /*
8640 * Free the memory allocated for phci client path list.
8641 */
8642 static void
8643 free_phclient_path_list(mdi_phys_path_t *pp_head)
8644 {
8645 mdi_phys_path_t *pp, *pp_next;
8646
8647 for (pp = pp_head; pp != NULL; pp = pp_next) {
8648 pp_next = pp->phys_path_next;
8649 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1);
8650 kmem_free(pp, sizeof (*pp));
8651 }
8652 }
8653
8654 /*
8655 * Allocated async client structure and initialize with the specified values.
8656 */
8657 static mdi_async_client_config_t *
8658 alloc_async_client_config(char *ct_name, char *ct_addr,
8659 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8660 {
8661 mdi_async_client_config_t *acc;
8662
8663 acc = kmem_alloc(sizeof (*acc), KM_SLEEP);
8664 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP);
8665 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP);
8666 acc->acc_phclient_path_list_head = pp_head;
8667 init_vhcache_lookup_token(&acc->acc_token, tok);
8668 acc->acc_next = NULL;
8669 return (acc);
8670 }
8671
8672 /*
8673 * Free the memory allocated for the async client structure and their members.
8674 */
8675 static void
8676 free_async_client_config(mdi_async_client_config_t *acc)
8677 {
8678 if (acc->acc_phclient_path_list_head)
8679 free_phclient_path_list(acc->acc_phclient_path_list_head);
8680 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1);
8681 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1);
8682 kmem_free(acc, sizeof (*acc));
8683 }
8684
8685 /*
8686 * Sort vhcache pathinfos (cpis) of the specified client.
8687 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
8688 * flag set come at the beginning of the list. All cpis which have this
8689 * flag set come at the end of the list.
8690 */
8691 static void
8692 sort_vhcache_paths(mdi_vhcache_client_t *cct)
8693 {
8694 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head;
8695
8696 cpi_head = cct->cct_cpi_head;
8697 cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
8698 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
8699 cpi_next = cpi->cpi_next;
8700 enqueue_vhcache_pathinfo(cct, cpi);
8701 }
8702 }
8703
8704 /*
8705 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for
8706 * every vhcache pathinfo of the specified client. If not adjust the flag
8707 * setting appropriately.
8708 *
8709 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the
8710 * on-disk vhci cache. So every time this flag is updated the cache must be
8711 * flushed.
8712 */
8713 static void
8714 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8715 mdi_vhcache_lookup_token_t *tok)
8716 {
8717 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8718 mdi_vhcache_client_t *cct;
8719 mdi_vhcache_pathinfo_t *cpi;
8720
8721 rw_enter(&vhcache->vhcache_lock, RW_READER);
8722 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok))
8723 == NULL) {
8724 rw_exit(&vhcache->vhcache_lock);
8725 return;
8726 }
8727
8728 /*
8729 * to avoid unnecessary on-disk cache updates, first check if an
8730 * update is really needed. If no update is needed simply return.
8731 */
8732 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8733 if ((cpi->cpi_pip != NULL &&
8734 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) ||
8735 (cpi->cpi_pip == NULL &&
8736 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) {
8737 break;
8738 }
8739 }
8740 if (cpi == NULL) {
8741 rw_exit(&vhcache->vhcache_lock);
8742 return;
8743 }
8744
8745 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) {
8746 rw_exit(&vhcache->vhcache_lock);
8747 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8748 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr,
8749 tok)) == NULL) {
8750 rw_exit(&vhcache->vhcache_lock);
8751 return;
8752 }
8753 }
8754
8755 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8756 if (cpi->cpi_pip != NULL)
8757 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8758 else
8759 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8760 }
8761 sort_vhcache_paths(cct);
8762
8763 rw_exit(&vhcache->vhcache_lock);
8764 vhcache_dirty(vhc);
8765 }
8766
8767 /*
8768 * Configure all specified paths of the client.
8769 */
8770 static void
8771 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8772 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8773 {
8774 mdi_phys_path_t *pp;
8775
8776 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next)
8777 (void) bus_config_one_phci_child(pp->phys_path);
8778 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok);
8779 }
8780
8781 /*
8782 * Dequeue elements from vhci async client config list and bus configure
8783 * their corresponding phci clients.
8784 */
8785 static void
8786 config_client_paths_thread(void *arg)
8787 {
8788 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8789 mdi_async_client_config_t *acc;
8790 clock_t quit_at_ticks;
8791 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND;
8792 callb_cpr_t cprinfo;
8793
8794 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
8795 "mdi_config_client_paths");
8796
8797 for (; ; ) {
8798 quit_at_ticks = ddi_get_lbolt() + idle_time;
8799
8800 mutex_enter(&vhc->vhc_lock);
8801 while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8802 vhc->vhc_acc_list_head == NULL &&
8803 ddi_get_lbolt() < quit_at_ticks) {
8804 CALLB_CPR_SAFE_BEGIN(&cprinfo);
8805 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8806 quit_at_ticks);
8807 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8808 }
8809
8810 if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
8811 vhc->vhc_acc_list_head == NULL)
8812 goto out;
8813
8814 acc = vhc->vhc_acc_list_head;
8815 vhc->vhc_acc_list_head = acc->acc_next;
8816 if (vhc->vhc_acc_list_head == NULL)
8817 vhc->vhc_acc_list_tail = NULL;
8818 vhc->vhc_acc_count--;
8819 mutex_exit(&vhc->vhc_lock);
8820
8821 config_client_paths_sync(vhc, acc->acc_ct_name,
8822 acc->acc_ct_addr, acc->acc_phclient_path_list_head,
8823 &acc->acc_token);
8824
8825 free_async_client_config(acc);
8826 }
8827
8828 out:
8829 vhc->vhc_acc_thrcount--;
8830 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */
8831 CALLB_CPR_EXIT(&cprinfo);
8832 }
8833
8834 /*
8835 * Arrange for all the phci client paths (pp_head) for the specified client
8836 * to be bus configured asynchronously by a thread.
8837 */
8838 static void
8839 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8840 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8841 {
8842 mdi_async_client_config_t *acc, *newacc;
8843 int create_thread;
8844
8845 if (pp_head == NULL)
8846 return;
8847
8848 if (mdi_mtc_off) {
8849 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok);
8850 free_phclient_path_list(pp_head);
8851 return;
8852 }
8853
8854 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok);
8855 ASSERT(newacc);
8856
8857 mutex_enter(&vhc->vhc_lock);
8858 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) {
8859 if (strcmp(ct_name, acc->acc_ct_name) == 0 &&
8860 strcmp(ct_addr, acc->acc_ct_addr) == 0) {
8861 free_async_client_config(newacc);
8862 mutex_exit(&vhc->vhc_lock);
8863 return;
8864 }
8865 }
8866
8867 if (vhc->vhc_acc_list_head == NULL)
8868 vhc->vhc_acc_list_head = newacc;
8869 else
8870 vhc->vhc_acc_list_tail->acc_next = newacc;
8871 vhc->vhc_acc_list_tail = newacc;
8872 vhc->vhc_acc_count++;
8873 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) {
8874 cv_broadcast(&vhc->vhc_cv);
8875 create_thread = 0;
8876 } else {
8877 vhc->vhc_acc_thrcount++;
8878 create_thread = 1;
8879 }
8880 mutex_exit(&vhc->vhc_lock);
8881
8882 if (create_thread)
8883 (void) thread_create(NULL, 0, config_client_paths_thread, vhc,
8884 0, &p0, TS_RUN, minclsyspri);
8885 }
8886
8887 /*
8888 * Return number of online paths for the specified client.
8889 */
8890 static int
8891 nonline_paths(mdi_vhcache_client_t *cct)
8892 {
8893 mdi_vhcache_pathinfo_t *cpi;
8894 int online_count = 0;
8895
8896 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8897 if (cpi->cpi_pip != NULL) {
8898 MDI_PI_LOCK(cpi->cpi_pip);
8899 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE)
8900 online_count++;
8901 MDI_PI_UNLOCK(cpi->cpi_pip);
8902 }
8903 }
8904
8905 return (online_count);
8906 }
8907
8908 /*
8909 * Bus configure all paths for the specified vhci client.
8910 * If at least one path for the client is already online, the remaining paths
8911 * will be configured asynchronously. Otherwise, it synchronously configures
8912 * the paths until at least one path is online and then rest of the paths
8913 * will be configured asynchronously.
8914 */
8915 static void
8916 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr)
8917 {
8918 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8919 mdi_phys_path_t *pp_head, *pp;
8920 mdi_vhcache_client_t *cct;
8921 mdi_vhcache_lookup_token_t tok;
8922
8923 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8924
8925 init_vhcache_lookup_token(&tok, NULL);
8926
8927 if (ct_name == NULL || ct_addr == NULL ||
8928 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok))
8929 == NULL ||
8930 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) {
8931 rw_exit(&vhcache->vhcache_lock);
8932 return;
8933 }
8934
8935 /* if at least one path is online, configure the rest asynchronously */
8936 if (nonline_paths(cct) > 0) {
8937 rw_exit(&vhcache->vhcache_lock);
8938 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok);
8939 return;
8940 }
8941
8942 rw_exit(&vhcache->vhcache_lock);
8943
8944 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) {
8945 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) {
8946 rw_enter(&vhcache->vhcache_lock, RW_READER);
8947
8948 if ((cct = lookup_vhcache_client(vhcache, ct_name,
8949 ct_addr, &tok)) == NULL) {
8950 rw_exit(&vhcache->vhcache_lock);
8951 goto out;
8952 }
8953
8954 if (nonline_paths(cct) > 0 &&
8955 pp->phys_path_next != NULL) {
8956 rw_exit(&vhcache->vhcache_lock);
8957 config_client_paths_async(vhc, ct_name, ct_addr,
8958 pp->phys_path_next, &tok);
8959 pp->phys_path_next = NULL;
8960 goto out;
8961 }
8962
8963 rw_exit(&vhcache->vhcache_lock);
8964 }
8965 }
8966
8967 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok);
8968 out:
8969 free_phclient_path_list(pp_head);
8970 }
8971
8972 static void
8973 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc)
8974 {
8975 mutex_enter(&vhc->vhc_lock);
8976 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED)
8977 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock);
8978 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED;
8979 mutex_exit(&vhc->vhc_lock);
8980 }
8981
8982 static void
8983 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc)
8984 {
8985 mutex_enter(&vhc->vhc_lock);
8986 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED;
8987 cv_broadcast(&vhc->vhc_cv);
8988 mutex_exit(&vhc->vhc_lock);
8989 }
8990
8991 typedef struct mdi_phci_driver_info {
8992 char *phdriver_name; /* name of the phci driver */
8993
8994 /* set to non zero if the phci driver supports root device */
8995 int phdriver_root_support;
8996 } mdi_phci_driver_info_t;
8997
8998 /*
8999 * vhci class and root support capability of a phci driver can be
9000 * specified using ddi-vhci-class and ddi-no-root-support properties in the
9001 * phci driver.conf file. The built-in tables below contain this information
9002 * for those phci drivers whose driver.conf files don't yet contain this info.
9003 *
9004 * All phci drivers expect iscsi have root device support.
9005 */
9006 static mdi_phci_driver_info_t scsi_phci_driver_list[] = {
9007 { "fp", 1 },
9008 { "iscsi", 0 },
9009 { "ibsrp", 1 }
9010 };
9011
9012 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 };
9013
9014 static void *
9015 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size)
9016 {
9017 void *new_ptr;
9018
9019 new_ptr = kmem_zalloc(new_size, KM_SLEEP);
9020 if (old_ptr) {
9021 bcopy(old_ptr, new_ptr, MIN(old_size, new_size));
9022 kmem_free(old_ptr, old_size);
9023 }
9024 return (new_ptr);
9025 }
9026
9027 static void
9028 add_to_phci_list(char ***driver_list, int **root_support_list,
9029 int *cur_elements, int *max_elements, char *driver_name, int root_support)
9030 {
9031 ASSERT(*cur_elements <= *max_elements);
9032 if (*cur_elements == *max_elements) {
9033 *max_elements += 10;
9034 *driver_list = mdi_realloc(*driver_list,
9035 sizeof (char *) * (*cur_elements),
9036 sizeof (char *) * (*max_elements));
9037 *root_support_list = mdi_realloc(*root_support_list,
9038 sizeof (int) * (*cur_elements),
9039 sizeof (int) * (*max_elements));
9040 }
9041 (*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP);
9042 (*root_support_list)[*cur_elements] = root_support;
9043 (*cur_elements)++;
9044 }
9045
9046 static void
9047 get_phci_driver_list(char *vhci_class, char ***driver_list,
9048 int **root_support_list, int *cur_elements, int *max_elements)
9049 {
9050 mdi_phci_driver_info_t *st_driver_list, *p;
9051 int st_ndrivers, root_support, i, j, driver_conf_count;
9052 major_t m;
9053 struct devnames *dnp;
9054 ddi_prop_t *propp;
9055
9056 *driver_list = NULL;
9057 *root_support_list = NULL;
9058 *cur_elements = 0;
9059 *max_elements = 0;
9060
9061 /* add the phci drivers derived from the phci driver.conf files */
9062 for (m = 0; m < devcnt; m++) {
9063 dnp = &devnamesp[m];
9064
9065 if (dnp->dn_flags & DN_PHCI_DRIVER) {
9066 LOCK_DEV_OPS(&dnp->dn_lock);
9067 if (dnp->dn_global_prop_ptr != NULL &&
9068 (propp = i_ddi_prop_search(DDI_DEV_T_ANY,
9069 DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING,
9070 &dnp->dn_global_prop_ptr->prop_list)) != NULL &&
9071 strcmp(propp->prop_val, vhci_class) == 0) {
9072
9073 root_support = (i_ddi_prop_search(DDI_DEV_T_ANY,
9074 DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT,
9075 &dnp->dn_global_prop_ptr->prop_list)
9076 == NULL) ? 1 : 0;
9077
9078 add_to_phci_list(driver_list, root_support_list,
9079 cur_elements, max_elements, dnp->dn_name,
9080 root_support);
9081
9082 UNLOCK_DEV_OPS(&dnp->dn_lock);
9083 } else
9084 UNLOCK_DEV_OPS(&dnp->dn_lock);
9085 }
9086 }
9087
9088 driver_conf_count = *cur_elements;
9089
9090 /* add the phci drivers specified in the built-in tables */
9091 if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) {
9092 st_driver_list = scsi_phci_driver_list;
9093 st_ndrivers = sizeof (scsi_phci_driver_list) /
9094 sizeof (mdi_phci_driver_info_t);
9095 } else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) {
9096 st_driver_list = ib_phci_driver_list;
9097 st_ndrivers = sizeof (ib_phci_driver_list) /
9098 sizeof (mdi_phci_driver_info_t);
9099 } else {
9100 st_driver_list = NULL;
9101 st_ndrivers = 0;
9102 }
9103
9104 for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) {
9105 /* add this phci driver if not already added before */
9106 for (j = 0; j < driver_conf_count; j++) {
9107 if (strcmp((*driver_list)[j], p->phdriver_name) == 0)
9108 break;
9109 }
9110 if (j == driver_conf_count) {
9111 add_to_phci_list(driver_list, root_support_list,
9112 cur_elements, max_elements, p->phdriver_name,
9113 p->phdriver_root_support);
9114 }
9115 }
9116 }
9117
9118 /*
9119 * Attach the phci driver instances associated with the specified vhci class.
9120 * If root is mounted attach all phci driver instances.
9121 * If root is not mounted, attach the instances of only those phci
9122 * drivers that have the root support.
9123 */
9124 static void
9125 attach_phci_drivers(char *vhci_class)
9126 {
9127 char **driver_list, **p;
9128 int *root_support_list;
9129 int cur_elements, max_elements, i;
9130 major_t m;
9131
9132 get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
9133 &cur_elements, &max_elements);
9134
9135 for (i = 0; i < cur_elements; i++) {
9136 if (modrootloaded || root_support_list[i]) {
9137 m = ddi_name_to_major(driver_list[i]);
9138 if (m != DDI_MAJOR_T_NONE &&
9139 ddi_hold_installed_driver(m))
9140 ddi_rele_driver(m);
9141 }
9142 }
9143
9144 if (driver_list) {
9145 for (i = 0, p = driver_list; i < cur_elements; i++, p++)
9146 kmem_free(*p, strlen(*p) + 1);
9147 kmem_free(driver_list, sizeof (char *) * max_elements);
9148 kmem_free(root_support_list, sizeof (int) * max_elements);
9149 }
9150 }
9151
9152 /*
9153 * Build vhci cache:
9154 *
9155 * Attach phci driver instances and then drive BUS_CONFIG_ALL on
9156 * the phci driver instances. During this process the cache gets built.
9157 *
9158 * Cache is built fully if the root is mounted.
9159 * If the root is not mounted, phci drivers that do not have root support
9160 * are not attached. As a result the cache is built partially. The entries
9161 * in the cache reflect only those phci drivers that have root support.
9162 */
9163 static int
9164 build_vhci_cache(mdi_vhci_t *vh)
9165 {
9166 mdi_vhci_config_t *vhc = vh->vh_config;
9167 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9168
9169 single_threaded_vhconfig_enter(vhc);
9170
9171 rw_enter(&vhcache->vhcache_lock, RW_READER);
9172 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) {
9173 rw_exit(&vhcache->vhcache_lock);
9174 single_threaded_vhconfig_exit(vhc);
9175 return (0);
9176 }
9177 rw_exit(&vhcache->vhcache_lock);
9178
9179 attach_phci_drivers(vh->vh_class);
9180 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT,
9181 BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
9182
9183 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
9184 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
9185 rw_exit(&vhcache->vhcache_lock);
9186
9187 single_threaded_vhconfig_exit(vhc);
9188 vhcache_dirty(vhc);
9189 return (1);
9190 }
9191
9192 /*
9193 * Determine if discovery of paths is needed.
9194 */
9195 static int
9196 vhcache_do_discovery(mdi_vhci_config_t *vhc)
9197 {
9198 int rv = 1;
9199
9200 mutex_enter(&vhc->vhc_lock);
9201 if (i_ddi_io_initialized() == 0) {
9202 if (vhc->vhc_path_discovery_boot > 0) {
9203 vhc->vhc_path_discovery_boot--;
9204 goto out;
9205 }
9206 } else {
9207 if (vhc->vhc_path_discovery_postboot > 0) {
9208 vhc->vhc_path_discovery_postboot--;
9209 goto out;
9210 }
9211 }
9212
9213 /*
9214 * Do full path discovery at most once per mdi_path_discovery_interval.
9215 * This is to avoid a series of full path discoveries when opening
9216 * stale /dev/[r]dsk links.
9217 */
9218 if (mdi_path_discovery_interval != -1 &&
9219 ddi_get_lbolt64() >= vhc->vhc_path_discovery_cutoff_time)
9220 goto out;
9221
9222 rv = 0;
9223 out:
9224 mutex_exit(&vhc->vhc_lock);
9225 return (rv);
9226 }
9227
9228 /*
9229 * Discover all paths:
9230 *
9231 * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci
9232 * driver instances. During this process all paths will be discovered.
9233 */
9234 static int
9235 vhcache_discover_paths(mdi_vhci_t *vh)
9236 {
9237 mdi_vhci_config_t *vhc = vh->vh_config;
9238 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9239 int rv = 0;
9240
9241 single_threaded_vhconfig_enter(vhc);
9242
9243 if (vhcache_do_discovery(vhc)) {
9244 attach_phci_drivers(vh->vh_class);
9245 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE |
9246 NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
9247
9248 mutex_enter(&vhc->vhc_lock);
9249 vhc->vhc_path_discovery_cutoff_time = ddi_get_lbolt64() +
9250 mdi_path_discovery_interval * TICKS_PER_SECOND;
9251 mutex_exit(&vhc->vhc_lock);
9252 rv = 1;
9253 }
9254
9255 single_threaded_vhconfig_exit(vhc);
9256 return (rv);
9257 }
9258
9259 /*
9260 * Generic vhci bus config implementation:
9261 *
9262 * Parameters
9263 * vdip vhci dip
9264 * flags bus config flags
9265 * op bus config operation
9266 * The remaining parameters are bus config operation specific
9267 *
9268 * for BUS_CONFIG_ONE
9269 * arg pointer to name@addr
9270 * child upon successful return from this function, *child will be
9271 * set to the configured and held devinfo child node of vdip.
9272 * ct_addr pointer to client address (i.e. GUID)
9273 *
9274 * for BUS_CONFIG_DRIVER
9275 * arg major number of the driver
9276 * child and ct_addr parameters are ignored
9277 *
9278 * for BUS_CONFIG_ALL
9279 * arg, child, and ct_addr parameters are ignored
9280 *
9281 * Note that for the rest of the bus config operations, this function simply
9282 * calls the framework provided default bus config routine.
9283 */
9284 int
9285 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op,
9286 void *arg, dev_info_t **child, char *ct_addr)
9287 {
9288 mdi_vhci_t *vh = i_devi_get_vhci(vdip);
9289 mdi_vhci_config_t *vhc = vh->vh_config;
9290 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9291 int rv = 0;
9292 int params_valid = 0;
9293 char *cp;
9294
9295 /*
9296 * To bus config vhcis we relay operation, possibly using another
9297 * thread, to phcis. The phci driver then interacts with MDI to cause
9298 * vhci child nodes to be enumerated under the vhci node. Adding a
9299 * vhci child requires an ndi_devi_enter of the vhci. Since another
9300 * thread may be adding the child, to avoid deadlock we can't wait
9301 * for the relayed operations to complete if we have already entered
9302 * the vhci node.
9303 */
9304 if (DEVI_BUSY_OWNED(vdip)) {
9305 MDI_DEBUG(2, (MDI_NOTE, vdip,
9306 "vhci dip is busy owned %p", (void *)vdip));
9307 goto default_bus_config;
9308 }
9309
9310 rw_enter(&vhcache->vhcache_lock, RW_READER);
9311 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
9312 rw_exit(&vhcache->vhcache_lock);
9313 rv = build_vhci_cache(vh);
9314 rw_enter(&vhcache->vhcache_lock, RW_READER);
9315 }
9316
9317 switch (op) {
9318 case BUS_CONFIG_ONE:
9319 if (arg != NULL && ct_addr != NULL) {
9320 /* extract node name */
9321 cp = (char *)arg;
9322 while (*cp != '\0' && *cp != '@')
9323 cp++;
9324 if (*cp == '@') {
9325 params_valid = 1;
9326 *cp = '\0';
9327 config_client_paths(vhc, (char *)arg, ct_addr);
9328 /* config_client_paths() releases cache_lock */
9329 *cp = '@';
9330 break;
9331 }
9332 }
9333
9334 rw_exit(&vhcache->vhcache_lock);
9335 break;
9336
9337 case BUS_CONFIG_DRIVER:
9338 rw_exit(&vhcache->vhcache_lock);
9339 if (rv == 0)
9340 st_bus_config_all_phcis(vhc, flags, op,
9341 (major_t)(uintptr_t)arg);
9342 break;
9343
9344 case BUS_CONFIG_ALL:
9345 rw_exit(&vhcache->vhcache_lock);
9346 if (rv == 0)
9347 st_bus_config_all_phcis(vhc, flags, op, -1);
9348 break;
9349
9350 default:
9351 rw_exit(&vhcache->vhcache_lock);
9352 break;
9353 }
9354
9355
9356 default_bus_config:
9357 /*
9358 * All requested child nodes are enumerated under the vhci.
9359 * Now configure them.
9360 */
9361 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
9362 NDI_SUCCESS) {
9363 return (MDI_SUCCESS);
9364 } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) {
9365 /* discover all paths and try configuring again */
9366 if (vhcache_discover_paths(vh) &&
9367 ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
9368 NDI_SUCCESS)
9369 return (MDI_SUCCESS);
9370 }
9371
9372 return (MDI_FAILURE);
9373 }
9374
9375 /*
9376 * Read the on-disk vhci cache into an nvlist for the specified vhci class.
9377 */
9378 static nvlist_t *
9379 read_on_disk_vhci_cache(char *vhci_class)
9380 {
9381 nvlist_t *nvl;
9382 int err;
9383 char *filename;
9384
9385 filename = vhclass2vhcache_filename(vhci_class);
9386
9387 if ((err = fread_nvlist(filename, &nvl)) == 0) {
9388 kmem_free(filename, strlen(filename) + 1);
9389 return (nvl);
9390 } else if (err == EIO)
9391 cmn_err(CE_WARN, "%s: I/O error, will recreate", filename);
9392 else if (err == EINVAL)
9393 cmn_err(CE_WARN,
9394 "%s: data file corrupted, will recreate", filename);
9395
9396 kmem_free(filename, strlen(filename) + 1);
9397 return (NULL);
9398 }
9399
9400 /*
9401 * Read on-disk vhci cache into nvlists for all vhci classes.
9402 * Called during booting by i_ddi_read_devices_files().
9403 */
9404 void
9405 mdi_read_devices_files(void)
9406 {
9407 int i;
9408
9409 for (i = 0; i < N_VHCI_CLASSES; i++)
9410 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]);
9411 }
9412
9413 /*
9414 * Remove all stale entries from vhci cache.
9415 */
9416 static void
9417 clean_vhcache(mdi_vhci_config_t *vhc)
9418 {
9419 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9420 mdi_vhcache_phci_t *phci, *nxt_phci;
9421 mdi_vhcache_client_t *client, *nxt_client;
9422 mdi_vhcache_pathinfo_t *path, *nxt_path;
9423
9424 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
9425
9426 client = vhcache->vhcache_client_head;
9427 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL;
9428 for ( ; client != NULL; client = nxt_client) {
9429 nxt_client = client->cct_next;
9430
9431 path = client->cct_cpi_head;
9432 client->cct_cpi_head = client->cct_cpi_tail = NULL;
9433 for ( ; path != NULL; path = nxt_path) {
9434 nxt_path = path->cpi_next;
9435 if ((path->cpi_cphci->cphci_phci != NULL) &&
9436 (path->cpi_pip != NULL)) {
9437 enqueue_tail_vhcache_pathinfo(client, path);
9438 } else if (path->cpi_pip != NULL) {
9439 /* Not valid to have a path without a phci. */
9440 free_vhcache_pathinfo(path);
9441 }
9442 }
9443
9444 if (client->cct_cpi_head != NULL)
9445 enqueue_vhcache_client(vhcache, client);
9446 else {
9447 (void) mod_hash_destroy(vhcache->vhcache_client_hash,
9448 (mod_hash_key_t)client->cct_name_addr);
9449 free_vhcache_client(client);
9450 }
9451 }
9452
9453 phci = vhcache->vhcache_phci_head;
9454 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL;
9455 for ( ; phci != NULL; phci = nxt_phci) {
9456
9457 nxt_phci = phci->cphci_next;
9458 if (phci->cphci_phci != NULL)
9459 enqueue_vhcache_phci(vhcache, phci);
9460 else
9461 free_vhcache_phci(phci);
9462 }
9463
9464 vhcache->vhcache_clean_time = ddi_get_lbolt64();
9465 rw_exit(&vhcache->vhcache_lock);
9466 vhcache_dirty(vhc);
9467 }
9468
9469 /*
9470 * Remove all stale entries from vhci cache.
9471 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C
9472 */
9473 void
9474 mdi_clean_vhcache(void)
9475 {
9476 mdi_vhci_t *vh;
9477
9478 mutex_enter(&mdi_mutex);
9479 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9480 vh->vh_refcnt++;
9481 mutex_exit(&mdi_mutex);
9482 clean_vhcache(vh->vh_config);
9483 mutex_enter(&mdi_mutex);
9484 vh->vh_refcnt--;
9485 }
9486 mutex_exit(&mdi_mutex);
9487 }
9488
9489 /*
9490 * mdi_vhci_walk_clients():
9491 * Walker routine to traverse client dev_info nodes
9492 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree
9493 * below the client, including nexus devices, which we dont want.
9494 * So we just traverse the immediate siblings, starting from 1st client.
9495 */
9496 void
9497 mdi_vhci_walk_clients(dev_info_t *vdip,
9498 int (*f)(dev_info_t *, void *), void *arg)
9499 {
9500 mdi_vhci_t *vh = i_devi_get_vhci(vdip);
9501 dev_info_t *cdip;
9502 mdi_client_t *ct;
9503
9504 MDI_VHCI_CLIENT_LOCK(vh);
9505 cdip = ddi_get_child(vdip);
9506 while (cdip) {
9507 ct = i_devi_get_client(cdip);
9508 MDI_CLIENT_LOCK(ct);
9509
9510 if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE)
9511 cdip = ddi_get_next_sibling(cdip);
9512 else
9513 cdip = NULL;
9514
9515 MDI_CLIENT_UNLOCK(ct);
9516 }
9517 MDI_VHCI_CLIENT_UNLOCK(vh);
9518 }
9519
9520 /*
9521 * mdi_vhci_walk_phcis():
9522 * Walker routine to traverse phci dev_info nodes
9523 */
9524 void
9525 mdi_vhci_walk_phcis(dev_info_t *vdip,
9526 int (*f)(dev_info_t *, void *), void *arg)
9527 {
9528 mdi_vhci_t *vh = i_devi_get_vhci(vdip);
9529 mdi_phci_t *ph, *next;
9530
9531 MDI_VHCI_PHCI_LOCK(vh);
9532 ph = vh->vh_phci_head;
9533 while (ph) {
9534 MDI_PHCI_LOCK(ph);
9535
9536 if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE)
9537 next = ph->ph_next;
9538 else
9539 next = NULL;
9540
9541 MDI_PHCI_UNLOCK(ph);
9542 ph = next;
9543 }
9544 MDI_VHCI_PHCI_UNLOCK(vh);
9545 }
9546
9547
9548 /*
9549 * mdi_walk_vhcis():
9550 * Walker routine to traverse vhci dev_info nodes
9551 */
9552 void
9553 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg)
9554 {
9555 mdi_vhci_t *vh = NULL;
9556
9557 mutex_enter(&mdi_mutex);
9558 /*
9559 * Scan for already registered vhci
9560 */
9561 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9562 vh->vh_refcnt++;
9563 mutex_exit(&mdi_mutex);
9564 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) {
9565 mutex_enter(&mdi_mutex);
9566 vh->vh_refcnt--;
9567 break;
9568 } else {
9569 mutex_enter(&mdi_mutex);
9570 vh->vh_refcnt--;
9571 }
9572 }
9573
9574 mutex_exit(&mdi_mutex);
9575 }
9576
9577 /*
9578 * i_mdi_log_sysevent():
9579 * Logs events for pickup by syseventd
9580 */
9581 static void
9582 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass)
9583 {
9584 char *path_name;
9585 nvlist_t *attr_list;
9586
9587 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE,
9588 KM_SLEEP) != DDI_SUCCESS) {
9589 goto alloc_failed;
9590 }
9591
9592 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
9593 (void) ddi_pathname(dip, path_name);
9594
9595 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME,
9596 ddi_driver_name(dip)) != DDI_SUCCESS) {
9597 goto error;
9598 }
9599
9600 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR,
9601 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) {
9602 goto error;
9603 }
9604
9605 if (nvlist_add_int32(attr_list, DDI_INSTANCE,
9606 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) {
9607 goto error;
9608 }
9609
9610 if (nvlist_add_string(attr_list, DDI_PATHNAME,
9611 path_name) != DDI_SUCCESS) {
9612 goto error;
9613 }
9614
9615 if (nvlist_add_string(attr_list, DDI_CLASS,
9616 ph_vh_class) != DDI_SUCCESS) {
9617 goto error;
9618 }
9619
9620 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass,
9621 attr_list, NULL, DDI_SLEEP);
9622
9623 error:
9624 kmem_free(path_name, MAXPATHLEN);
9625 nvlist_free(attr_list);
9626 return;
9627
9628 alloc_failed:
9629 MDI_DEBUG(1, (MDI_WARN, dip, "!unable to send sysevent"));
9630 }
9631
9632 char **
9633 mdi_get_phci_driver_list(char *vhci_class, int *ndrivers)
9634 {
9635 char **driver_list, **ret_driver_list = NULL;
9636 int *root_support_list;
9637 int cur_elements, max_elements;
9638
9639 get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
9640 &cur_elements, &max_elements);
9641
9642
9643 if (driver_list) {
9644 kmem_free(root_support_list, sizeof (int) * max_elements);
9645 ret_driver_list = mdi_realloc(driver_list, sizeof (char *)
9646 * max_elements, sizeof (char *) * cur_elements);
9647 }
9648 *ndrivers = cur_elements;
9649
9650 return (ret_driver_list);
9651
9652 }
9653
9654 void
9655 mdi_free_phci_driver_list(char **driver_list, int ndrivers)
9656 {
9657 char **p;
9658 int i;
9659
9660 if (driver_list) {
9661 for (i = 0, p = driver_list; i < ndrivers; i++, p++)
9662 kmem_free(*p, strlen(*p) + 1);
9663 kmem_free(driver_list, sizeof (char *) * ndrivers);
9664 }
9665 }
9666
9667 /*
9668 * mdi_is_dev_supported():
9669 * function called by pHCI bus config operation to determine if a
9670 * device should be represented as a child of the vHCI or the
9671 * pHCI. This decision is made by the vHCI, using cinfo idenity
9672 * information passed by the pHCI - specifics of the cinfo
9673 * representation are by agreement between the pHCI and vHCI.
9674 * Return Values:
9675 * MDI_SUCCESS
9676 * MDI_FAILURE
9677 */
9678 int
9679 mdi_is_dev_supported(char *class, dev_info_t *pdip, void *cinfo)
9680 {
9681 mdi_vhci_t *vh;
9682
9683 ASSERT(class && pdip);
9684
9685 /*
9686 * For dev_supported, mdi_phci_register() must have established pdip as
9687 * a pHCI.
9688 *
9689 * NOTE: mdi_phci_register() does "mpxio-disable" processing, and
9690 * MDI_PHCI(pdip) will return false if mpxio is disabled.
9691 */
9692 if (!MDI_PHCI(pdip))
9693 return (MDI_FAILURE);
9694
9695 /* Return MDI_FAILURE if vHCI does not support asking the question. */
9696 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
9697 if ((vh == NULL) || (vh->vh_ops->vo_is_dev_supported == NULL)) {
9698 return (MDI_FAILURE);
9699 }
9700
9701 /* Return vHCI answer */
9702 return (vh->vh_ops->vo_is_dev_supported(vh->vh_dip, pdip, cinfo));
9703 }
9704
9705 int
9706 mdi_dc_return_dev_state(mdi_pathinfo_t *pip, struct devctl_iocdata *dcp)
9707 {
9708 uint_t devstate = 0;
9709 dev_info_t *cdip;
9710
9711 if ((pip == NULL) || (dcp == NULL))
9712 return (MDI_FAILURE);
9713
9714 cdip = mdi_pi_get_client(pip);
9715
9716 switch (mdi_pi_get_state(pip)) {
9717 case MDI_PATHINFO_STATE_INIT:
9718 devstate = DEVICE_DOWN;
9719 break;
9720 case MDI_PATHINFO_STATE_ONLINE:
9721 devstate = DEVICE_ONLINE;
9722 if ((cdip) && (devi_stillreferenced(cdip) == DEVI_REFERENCED))
9723 devstate |= DEVICE_BUSY;
9724 break;
9725 case MDI_PATHINFO_STATE_STANDBY:
9726 devstate = DEVICE_ONLINE;
9727 break;
9728 case MDI_PATHINFO_STATE_FAULT:
9729 devstate = DEVICE_DOWN;
9730 break;
9731 case MDI_PATHINFO_STATE_OFFLINE:
9732 devstate = DEVICE_OFFLINE;
9733 break;
9734 default:
9735 ASSERT(MDI_PI(pip)->pi_state);
9736 }
9737
9738 if (copyout(&devstate, dcp->cpyout_buf, sizeof (uint_t)) != 0)
9739 return (MDI_FAILURE);
9740
9741 return (MDI_SUCCESS);
9742 }