Print this page
NEX-3997 Kernel BAD TRAP type=d panic from di_mem_addr on bogus di_state_t structure
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/io/devinfo.c
+++ new/usr/src/uts/common/io/devinfo.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25
26 26 /*
27 27 * driver for accessing kernel devinfo tree.
28 28 */
29 29 #include <sys/types.h>
30 30 #include <sys/pathname.h>
31 31 #include <sys/debug.h>
32 32 #include <sys/autoconf.h>
33 33 #include <sys/vmsystm.h>
34 34 #include <sys/conf.h>
35 35 #include <sys/file.h>
36 36 #include <sys/kmem.h>
37 37 #include <sys/modctl.h>
38 38 #include <sys/stat.h>
39 39 #include <sys/ddi.h>
40 40 #include <sys/sunddi.h>
41 41 #include <sys/sunldi_impl.h>
42 42 #include <sys/sunndi.h>
43 43 #include <sys/esunddi.h>
44 44 #include <sys/sunmdi.h>
45 45 #include <sys/ddi_impldefs.h>
46 46 #include <sys/ndi_impldefs.h>
47 47 #include <sys/mdi_impldefs.h>
48 48 #include <sys/devinfo_impl.h>
49 49 #include <sys/thread.h>
50 50 #include <sys/modhash.h>
51 51 #include <sys/bitmap.h>
52 52 #include <util/qsort.h>
53 53 #include <sys/disp.h>
54 54 #include <sys/kobj.h>
55 55 #include <sys/crc32.h>
56 56 #include <sys/ddi_hp.h>
57 57 #include <sys/ddi_hp_impl.h>
58 58 #include <sys/sysmacros.h>
59 59 #include <sys/list.h>
60 60
61 61
62 62 #ifdef DEBUG
63 63 static int di_debug;
64 64 #define dcmn_err(args) if (di_debug >= 1) cmn_err args
65 65 #define dcmn_err2(args) if (di_debug >= 2) cmn_err args
66 66 #define dcmn_err3(args) if (di_debug >= 3) cmn_err args
67 67 #else
68 68 #define dcmn_err(args) /* nothing */
69 69 #define dcmn_err2(args) /* nothing */
70 70 #define dcmn_err3(args) /* nothing */
71 71 #endif
72 72
73 73 /*
74 74 * We partition the space of devinfo minor nodes equally between the full and
75 75 * unprivileged versions of the driver. The even-numbered minor nodes are the
76 76 * full version, while the odd-numbered ones are the read-only version.
77 77 */
78 78 static int di_max_opens = 32;
79 79
80 80 static int di_prop_dyn = 1; /* enable dynamic property support */
81 81
82 82 #define DI_FULL_PARENT 0
83 83 #define DI_READONLY_PARENT 1
84 84 #define DI_NODE_SPECIES 2
85 85 #define DI_UNPRIVILEGED_NODE(x) (((x) % 2) != 0)
86 86
87 87 #define IOC_IDLE 0 /* snapshot ioctl states */
88 88 #define IOC_SNAP 1 /* snapshot in progress */
89 89 #define IOC_DONE 2 /* snapshot done, but not copied out */
90 90 #define IOC_COPY 3 /* copyout in progress */
91 91
92 92 /*
93 93 * Keep max alignment so we can move snapshot to different platforms.
94 94 *
95 95 * NOTE: Most callers should rely on the di_checkmem return value
96 96 * being aligned, and reestablish *off_p with aligned value, instead
97 97 * of trying to align size of their allocations: this approach will
98 98 * minimize memory use.
99 99 */
100 100 #define DI_ALIGN(addr) ((addr + 7l) & ~7l)
101 101
102 102 /*
103 103 * To avoid wasting memory, make a linked list of memory chunks.
104 104 * Size of each chunk is buf_size.
105 105 */
106 106 struct di_mem {
107 107 struct di_mem *next; /* link to next chunk */
108 108 char *buf; /* contiguous kernel memory */
109 109 size_t buf_size; /* size of buf in bytes */
110 110 devmap_cookie_t cook; /* cookie from ddi_umem_alloc */
111 111 };
112 112
113 113 /*
114 114 * This is a stack for walking the tree without using recursion.
115 115 * When the devinfo tree height is above some small size, one
116 116 * gets watchdog resets on sun4m.
117 117 */
118 118 struct di_stack {
119 119 void *offset[MAX_TREE_DEPTH];
120 120 struct dev_info *dip[MAX_TREE_DEPTH];
121 121 int circ[MAX_TREE_DEPTH];
122 122 int depth; /* depth of current node to be copied */
123 123 };
124 124
125 125 #define TOP_OFFSET(stack) \
126 126 ((di_off_t *)(stack)->offset[(stack)->depth - 1])
127 127 #define TOP_NODE(stack) \
128 128 ((stack)->dip[(stack)->depth - 1])
129 129 #define PARENT_OFFSET(stack) \
130 130 ((di_off_t *)(stack)->offset[(stack)->depth - 2])
131 131 #define EMPTY_STACK(stack) ((stack)->depth == 0)
132 132 #define POP_STACK(stack) { \
133 133 ndi_devi_exit((dev_info_t *)TOP_NODE(stack), \
134 134 (stack)->circ[(stack)->depth - 1]); \
135 135 ((stack)->depth--); \
136 136 }
137 137 #define PUSH_STACK(stack, node, off_p) { \
138 138 ASSERT(node != NULL); \
139 139 ndi_devi_enter((dev_info_t *)node, &(stack)->circ[(stack)->depth]); \
140 140 (stack)->dip[(stack)->depth] = (node); \
141 141 (stack)->offset[(stack)->depth] = (void *)(off_p); \
142 142 ((stack)->depth)++; \
143 143 }
144 144
145 145 #define DI_ALL_PTR(s) DI_ALL(di_mem_addr((s), 0))
146 146
147 147 /*
148 148 * With devfs, the device tree has no global locks. The device tree is
149 149 * dynamic and dips may come and go if they are not locked locally. Under
150 150 * these conditions, pointers are no longer reliable as unique IDs.
151 151 * Specifically, these pointers cannot be used as keys for hash tables
152 152 * as the same devinfo structure may be freed in one part of the tree only
153 153 * to be allocated as the structure for a different device in another
154 154 * part of the tree. This can happen if DR and the snapshot are
155 155 * happening concurrently.
156 156 * The following data structures act as keys for devinfo nodes and
157 157 * pathinfo nodes.
158 158 */
159 159
160 160 enum di_ktype {
161 161 DI_DKEY = 1,
162 162 DI_PKEY = 2
163 163 };
164 164
165 165 struct di_dkey {
166 166 dev_info_t *dk_dip;
167 167 major_t dk_major;
168 168 int dk_inst;
169 169 pnode_t dk_nodeid;
170 170 };
171 171
172 172 struct di_pkey {
173 173 mdi_pathinfo_t *pk_pip;
174 174 char *pk_path_addr;
175 175 dev_info_t *pk_client;
176 176 dev_info_t *pk_phci;
177 177 };
178 178
179 179 struct di_key {
180 180 enum di_ktype k_type;
181 181 union {
182 182 struct di_dkey dkey;
183 183 struct di_pkey pkey;
184 184 } k_u;
185 185 };
186 186
187 187
188 188 struct i_lnode;
189 189
190 190 typedef struct i_link {
191 191 /*
192 192 * If a di_link struct representing this i_link struct makes it
193 193 * into the snapshot, then self will point to the offset of
194 194 * the di_link struct in the snapshot
195 195 */
196 196 di_off_t self;
197 197
198 198 int spec_type; /* block or char access type */
199 199 struct i_lnode *src_lnode; /* src i_lnode */
200 200 struct i_lnode *tgt_lnode; /* tgt i_lnode */
201 201 struct i_link *src_link_next; /* next src i_link /w same i_lnode */
202 202 struct i_link *tgt_link_next; /* next tgt i_link /w same i_lnode */
203 203 } i_link_t;
204 204
205 205 typedef struct i_lnode {
206 206 /*
207 207 * If a di_lnode struct representing this i_lnode struct makes it
208 208 * into the snapshot, then self will point to the offset of
209 209 * the di_lnode struct in the snapshot
210 210 */
211 211 di_off_t self;
212 212
213 213 /*
214 214 * used for hashing and comparing i_lnodes
215 215 */
216 216 int modid;
217 217
218 218 /*
219 219 * public information describing a link endpoint
220 220 */
221 221 struct di_node *di_node; /* di_node in snapshot */
222 222 dev_t devt; /* devt */
223 223
224 224 /*
225 225 * i_link ptr to links coming into this i_lnode node
226 226 * (this i_lnode is the target of these i_links)
227 227 */
228 228 i_link_t *link_in;
229 229
230 230 /*
231 231 * i_link ptr to links going out of this i_lnode node
232 232 * (this i_lnode is the source of these i_links)
233 233 */
234 234 i_link_t *link_out;
235 235 } i_lnode_t;
236 236
237 237 typedef struct i_hp {
238 238 di_off_t hp_off; /* Offset of di_hp_t in snapshot */
239 239 dev_info_t *hp_child; /* Child devinfo node of the di_hp_t */
240 240 list_node_t hp_link; /* List linkage */
241 241 } i_hp_t;
242 242
243 243 /*
244 244 * Soft state associated with each instance of driver open.
245 245 */
246 246 static struct di_state {
247 247 di_off_t mem_size; /* total # bytes in memlist */
248 248 struct di_mem *memlist; /* head of memlist */
249 249 uint_t command; /* command from ioctl */
250 250 int di_iocstate; /* snapshot ioctl state */
251 251 mod_hash_t *reg_dip_hash;
252 252 mod_hash_t *reg_pip_hash;
253 253 int lnode_count;
254 254 int link_count;
255 255
256 256 mod_hash_t *lnode_hash;
257 257 mod_hash_t *link_hash;
258 258
259 259 list_t hp_list;
260 260 } **di_states;
261 261
262 262 static kmutex_t di_lock; /* serialize instance assignment */
263 263
264 264 typedef enum {
265 265 DI_QUIET = 0, /* DI_QUIET must always be 0 */
266 266 DI_ERR,
267 267 DI_INFO,
268 268 DI_TRACE,
269 269 DI_TRACE1,
270 270 DI_TRACE2
271 271 } di_cache_debug_t;
272 272
273 273 static uint_t di_chunk = 32; /* I/O chunk size in pages */
274 274
275 275 #define DI_CACHE_LOCK(c) (mutex_enter(&(c).cache_lock))
276 276 #define DI_CACHE_UNLOCK(c) (mutex_exit(&(c).cache_lock))
277 277 #define DI_CACHE_LOCKED(c) (mutex_owned(&(c).cache_lock))
278 278
279 279 /*
280 280 * Check that whole device tree is being configured as a pre-condition for
281 281 * cleaning up /etc/devices files.
282 282 */
283 283 #define DEVICES_FILES_CLEANABLE(st) \
284 284 (((st)->command & DINFOSUBTREE) && ((st)->command & DINFOFORCE) && \
285 285 strcmp(DI_ALL_PTR(st)->root_path, "/") == 0)
286 286
287 287 #define CACHE_DEBUG(args) \
288 288 { if (di_cache_debug != DI_QUIET) di_cache_print args; }
289 289
290 290 typedef struct phci_walk_arg {
291 291 di_off_t off;
292 292 struct di_state *st;
293 293 } phci_walk_arg_t;
294 294
295 295 static int di_open(dev_t *, int, int, cred_t *);
296 296 static int di_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
297 297 static int di_close(dev_t, int, int, cred_t *);
298 298 static int di_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
299 299 static int di_attach(dev_info_t *, ddi_attach_cmd_t);
300 300 static int di_detach(dev_info_t *, ddi_detach_cmd_t);
301 301
302 302 static di_off_t di_copyformat(di_off_t, struct di_state *, intptr_t, int);
303 303 static di_off_t di_snapshot_and_clean(struct di_state *);
304 304 static di_off_t di_copydevnm(di_off_t *, struct di_state *);
305 305 static di_off_t di_copytree(struct dev_info *, di_off_t *, struct di_state *);
306 306 static di_off_t di_copynode(struct dev_info *, struct di_stack *,
307 307 struct di_state *);
308 308 static di_off_t di_getmdata(struct ddi_minor_data *, di_off_t *, di_off_t,
309 309 struct di_state *);
310 310 static di_off_t di_getppdata(struct dev_info *, di_off_t *, struct di_state *);
311 311 static di_off_t di_getdpdata(struct dev_info *, di_off_t *, struct di_state *);
312 312 static di_off_t di_gethpdata(ddi_hp_cn_handle_t *, di_off_t *,
313 313 struct di_state *);
314 314 static di_off_t di_getprop(int, struct ddi_prop **, di_off_t *,
315 315 struct di_state *, struct dev_info *);
316 316 static void di_allocmem(struct di_state *, size_t);
317 317 static void di_freemem(struct di_state *);
318 318 static void di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz);
319 319 static di_off_t di_checkmem(struct di_state *, di_off_t, size_t);
320 320 static void *di_mem_addr(struct di_state *, di_off_t);
321 321 static int di_setstate(struct di_state *, int);
322 322 static void di_register_dip(struct di_state *, dev_info_t *, di_off_t);
323 323 static void di_register_pip(struct di_state *, mdi_pathinfo_t *, di_off_t);
324 324 static di_off_t di_getpath_data(dev_info_t *, di_off_t *, di_off_t,
325 325 struct di_state *, int);
326 326 static di_off_t di_getlink_data(di_off_t, struct di_state *);
327 327 static int di_dip_find(struct di_state *st, dev_info_t *node, di_off_t *off_p);
328 328
329 329 static int cache_args_valid(struct di_state *st, int *error);
330 330 static int snapshot_is_cacheable(struct di_state *st);
331 331 static int di_cache_lookup(struct di_state *st);
332 332 static int di_cache_update(struct di_state *st);
333 333 static void di_cache_print(di_cache_debug_t msglevel, char *fmt, ...);
334 334 static int build_vhci_list(dev_info_t *vh_devinfo, void *arg);
335 335 static int build_phci_list(dev_info_t *ph_devinfo, void *arg);
336 336 static void di_hotplug_children(struct di_state *st);
337 337
338 338 extern int modrootloaded;
339 339 extern void mdi_walk_vhcis(int (*)(dev_info_t *, void *), void *);
340 340 extern void mdi_vhci_walk_phcis(dev_info_t *,
341 341 int (*)(dev_info_t *, void *), void *);
342 342
343 343
344 344 static struct cb_ops di_cb_ops = {
345 345 di_open, /* open */
346 346 di_close, /* close */
347 347 nodev, /* strategy */
348 348 nodev, /* print */
349 349 nodev, /* dump */
350 350 nodev, /* read */
351 351 nodev, /* write */
352 352 di_ioctl, /* ioctl */
353 353 nodev, /* devmap */
354 354 nodev, /* mmap */
355 355 nodev, /* segmap */
356 356 nochpoll, /* poll */
357 357 ddi_prop_op, /* prop_op */
358 358 NULL, /* streamtab */
359 359 D_NEW | D_MP /* Driver compatibility flag */
360 360 };
361 361
362 362 static struct dev_ops di_ops = {
363 363 DEVO_REV, /* devo_rev, */
364 364 0, /* refcnt */
365 365 di_info, /* info */
366 366 nulldev, /* identify */
367 367 nulldev, /* probe */
368 368 di_attach, /* attach */
369 369 di_detach, /* detach */
370 370 nodev, /* reset */
371 371 &di_cb_ops, /* driver operations */
372 372 NULL /* bus operations */
373 373 };
374 374
375 375 /*
376 376 * Module linkage information for the kernel.
377 377 */
378 378 static struct modldrv modldrv = {
379 379 &mod_driverops,
380 380 "DEVINFO Driver",
381 381 &di_ops
382 382 };
383 383
384 384 static struct modlinkage modlinkage = {
385 385 MODREV_1,
386 386 &modldrv,
387 387 NULL
388 388 };
389 389
390 390 int
391 391 _init(void)
392 392 {
393 393 int error;
394 394
395 395 mutex_init(&di_lock, NULL, MUTEX_DRIVER, NULL);
396 396
397 397 error = mod_install(&modlinkage);
398 398 if (error != 0) {
399 399 mutex_destroy(&di_lock);
400 400 return (error);
401 401 }
402 402
403 403 return (0);
404 404 }
405 405
406 406 int
407 407 _info(struct modinfo *modinfop)
408 408 {
409 409 return (mod_info(&modlinkage, modinfop));
410 410 }
411 411
412 412 int
413 413 _fini(void)
414 414 {
415 415 int error;
416 416
417 417 error = mod_remove(&modlinkage);
418 418 if (error != 0) {
419 419 return (error);
420 420 }
421 421
422 422 mutex_destroy(&di_lock);
423 423 return (0);
424 424 }
425 425
426 426 static dev_info_t *di_dip;
427 427
428 428 /*ARGSUSED*/
429 429 static int
430 430 di_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
431 431 {
432 432 int error = DDI_FAILURE;
433 433
434 434 switch (infocmd) {
435 435 case DDI_INFO_DEVT2DEVINFO:
436 436 *result = (void *)di_dip;
437 437 error = DDI_SUCCESS;
438 438 break;
439 439 case DDI_INFO_DEVT2INSTANCE:
440 440 /*
441 441 * All dev_t's map to the same, single instance.
442 442 */
443 443 *result = (void *)0;
444 444 error = DDI_SUCCESS;
445 445 break;
446 446 default:
447 447 break;
448 448 }
449 449
450 450 return (error);
451 451 }
452 452
453 453 static int
454 454 di_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
455 455 {
456 456 int error = DDI_FAILURE;
457 457
458 458 switch (cmd) {
459 459 case DDI_ATTACH:
460 460 di_states = kmem_zalloc(
461 461 di_max_opens * sizeof (struct di_state *), KM_SLEEP);
462 462
463 463 if (ddi_create_minor_node(dip, "devinfo", S_IFCHR,
464 464 DI_FULL_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE ||
465 465 ddi_create_minor_node(dip, "devinfo,ro", S_IFCHR,
466 466 DI_READONLY_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE) {
467 467 kmem_free(di_states,
468 468 di_max_opens * sizeof (struct di_state *));
469 469 ddi_remove_minor_node(dip, NULL);
470 470 error = DDI_FAILURE;
471 471 } else {
472 472 di_dip = dip;
473 473 ddi_report_dev(dip);
474 474
475 475 error = DDI_SUCCESS;
476 476 }
477 477 break;
478 478 default:
479 479 error = DDI_FAILURE;
480 480 break;
481 481 }
482 482
483 483 return (error);
484 484 }
485 485
486 486 static int
487 487 di_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
488 488 {
489 489 int error = DDI_FAILURE;
490 490
491 491 switch (cmd) {
492 492 case DDI_DETACH:
493 493 ddi_remove_minor_node(dip, NULL);
494 494 di_dip = NULL;
495 495 kmem_free(di_states, di_max_opens * sizeof (struct di_state *));
496 496
497 497 error = DDI_SUCCESS;
498 498 break;
499 499 default:
500 500 error = DDI_FAILURE;
501 501 break;
502 502 }
503 503
504 504 return (error);
505 505 }
506 506
507 507 /*
508 508 * Allow multiple opens by tweaking the dev_t such that it looks like each
509 509 * open is getting a different minor device. Each minor gets a separate
510 510 * entry in the di_states[] table. Based on the original minor number, we
511 511 * discriminate opens of the full and read-only nodes. If all of the instances
512 512 * of the selected minor node are currently open, we return EAGAIN.
513 513 */
514 514 /*ARGSUSED*/
515 515 static int
516 516 di_open(dev_t *devp, int flag, int otyp, cred_t *credp)
517 517 {
518 518 int m;
519 519 minor_t minor_parent = getminor(*devp);
520 520
521 521 if (minor_parent != DI_FULL_PARENT &&
522 522 minor_parent != DI_READONLY_PARENT)
523 523 return (ENXIO);
524 524
525 525 mutex_enter(&di_lock);
526 526
527 527 for (m = minor_parent; m < di_max_opens; m += DI_NODE_SPECIES) {
528 528 if (di_states[m] != NULL)
529 529 continue;
530 530
531 531 di_states[m] = kmem_zalloc(sizeof (struct di_state), KM_SLEEP);
532 532 break; /* It's ours. */
533 533 }
534 534
535 535 if (m >= di_max_opens) {
536 536 /*
537 537 * maximum open instance for device reached
538 538 */
539 539 mutex_exit(&di_lock);
540 540 dcmn_err((CE_WARN, "devinfo: maximum devinfo open reached"));
541 541 return (EAGAIN);
542 542 }
543 543 mutex_exit(&di_lock);
544 544
545 545 ASSERT(m < di_max_opens);
546 546 *devp = makedevice(getmajor(*devp), (minor_t)(m + DI_NODE_SPECIES));
547 547
548 548 dcmn_err((CE_CONT, "di_open: thread = %p, assigned minor = %d\n",
549 549 (void *)curthread, m + DI_NODE_SPECIES));
550 550
|
↓ open down ↓ |
550 lines elided |
↑ open up ↑ |
551 551 return (0);
552 552 }
553 553
554 554 /*ARGSUSED*/
555 555 static int
556 556 di_close(dev_t dev, int flag, int otype, cred_t *cred_p)
557 557 {
558 558 struct di_state *st;
559 559 int m = (int)getminor(dev) - DI_NODE_SPECIES;
560 560
561 - if (m < 0) {
561 + if (m < 0 || m >= di_max_opens) {
562 562 cmn_err(CE_WARN, "closing non-existent devinfo minor %d",
563 563 m + DI_NODE_SPECIES);
564 564 return (ENXIO);
565 565 }
566 566
567 - st = di_states[m];
568 - ASSERT(m < di_max_opens && st != NULL);
569 -
570 - di_freemem(st);
571 - kmem_free(st, sizeof (struct di_state));
572 -
573 567 /*
574 568 * empty slot in state table
575 569 */
576 570 mutex_enter(&di_lock);
571 + st = di_states[m];
577 572 di_states[m] = NULL;
573 + mutex_exit(&di_lock);
574 +
575 + if (st != NULL) {
576 + di_freemem(st);
577 + kmem_free(st, sizeof (struct di_state));
578 + }
579 +
578 580 dcmn_err((CE_CONT, "di_close: thread = %p, assigned minor = %d\n",
579 581 (void *)curthread, m + DI_NODE_SPECIES));
580 - mutex_exit(&di_lock);
581 582
582 583 return (0);
583 584 }
584 585
585 586
586 587 /*ARGSUSED*/
587 588 static int
588 589 di_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
589 590 {
590 591 int rv, error;
591 592 di_off_t off;
592 593 struct di_all *all;
593 594 struct di_state *st;
594 595 int m = (int)getminor(dev) - DI_NODE_SPECIES;
595 596 major_t i;
|
↓ open down ↓ |
5 lines elided |
↑ open up ↑ |
596 597 char *drv_name;
597 598 size_t map_size, size;
598 599 struct di_mem *dcp;
599 600 int ndi_flags;
600 601
601 602 if (m < 0 || m >= di_max_opens) {
602 603 return (ENXIO);
603 604 }
604 605
605 606 st = di_states[m];
606 - ASSERT(st != NULL);
607 + if(st == NULL) {
608 + return (ENXIO);
609 + }
607 610
608 611 dcmn_err2((CE_CONT, "di_ioctl: mode = %x, cmd = %x\n", mode, cmd));
609 612
610 613 switch (cmd) {
611 614 case DINFOIDENT:
612 615 /*
613 616 * This is called from di_init to verify that the driver
614 617 * opened is indeed devinfo. The purpose is to guard against
615 618 * sending ioctl to an unknown driver in case of an
616 619 * unresolved major number conflict during bfu.
617 620 */
618 621 *rvalp = DI_MAGIC;
619 622 return (0);
620 623
621 624 case DINFOLODRV:
622 625 /*
623 626 * Hold an installed driver and return the result
624 627 */
625 628 if (DI_UNPRIVILEGED_NODE(m)) {
626 629 /*
627 630 * Only the fully enabled instances may issue
628 631 * DINFOLDDRV.
629 632 */
630 633 return (EACCES);
631 634 }
632 635
633 636 drv_name = kmem_alloc(MAXNAMELEN, KM_SLEEP);
634 637 if (ddi_copyin((void *)arg, drv_name, MAXNAMELEN, mode) != 0) {
635 638 kmem_free(drv_name, MAXNAMELEN);
636 639 return (EFAULT);
637 640 }
638 641
639 642 /*
640 643 * Some 3rd party driver's _init() walks the device tree,
641 644 * so we load the driver module before configuring driver.
642 645 */
643 646 i = ddi_name_to_major(drv_name);
644 647 if (ddi_hold_driver(i) == NULL) {
645 648 kmem_free(drv_name, MAXNAMELEN);
646 649 return (ENXIO);
647 650 }
648 651
649 652 ndi_flags = NDI_DEVI_PERSIST | NDI_CONFIG | NDI_NO_EVENT;
650 653
651 654 /*
652 655 * i_ddi_load_drvconf() below will trigger a reprobe
653 656 * via reset_nexus_flags(). NDI_DRV_CONF_REPROBE isn't
654 657 * needed here.
655 658 */
656 659 modunload_disable();
657 660 (void) i_ddi_load_drvconf(i);
658 661 (void) ndi_devi_config_driver(ddi_root_node(), ndi_flags, i);
659 662 kmem_free(drv_name, MAXNAMELEN);
660 663 ddi_rele_driver(i);
661 664 rv = i_ddi_devs_attached(i);
662 665 modunload_enable();
663 666
664 667 i_ddi_di_cache_invalidate();
665 668
666 669 return ((rv == DDI_SUCCESS)? 0 : ENXIO);
667 670
668 671 case DINFOUSRLD:
669 672 /*
670 673 * The case for copying snapshot to userland
671 674 */
672 675 if (di_setstate(st, IOC_COPY) == -1)
673 676 return (EBUSY);
674 677
675 678 map_size = DI_ALL_PTR(st)->map_size;
676 679 if (map_size == 0) {
677 680 (void) di_setstate(st, IOC_DONE);
678 681 return (EFAULT);
679 682 }
680 683
681 684 /*
682 685 * copyout the snapshot
683 686 */
684 687 map_size = (map_size + PAGEOFFSET) & PAGEMASK;
685 688
686 689 /*
687 690 * Return the map size, so caller may do a sanity
688 691 * check against the return value of snapshot ioctl()
689 692 */
690 693 *rvalp = (int)map_size;
691 694
692 695 /*
693 696 * Copy one chunk at a time
694 697 */
695 698 off = 0;
696 699 dcp = st->memlist;
697 700 while (map_size) {
698 701 size = dcp->buf_size;
699 702 if (map_size <= size) {
700 703 size = map_size;
701 704 }
702 705
703 706 if (ddi_copyout(di_mem_addr(st, off),
704 707 (void *)(arg + off), size, mode) != 0) {
705 708 (void) di_setstate(st, IOC_DONE);
706 709 return (EFAULT);
707 710 }
708 711
709 712 map_size -= size;
710 713 off += size;
711 714 dcp = dcp->next;
712 715 }
713 716
714 717 di_freemem(st);
715 718 (void) di_setstate(st, IOC_IDLE);
716 719 return (0);
717 720
718 721 default:
719 722 if ((cmd & ~DIIOC_MASK) != DIIOC) {
720 723 /*
721 724 * Invalid ioctl command
722 725 */
723 726 return (ENOTTY);
724 727 }
725 728 /*
726 729 * take a snapshot
727 730 */
728 731 st->command = cmd & DIIOC_MASK;
729 732 /*FALLTHROUGH*/
730 733 }
731 734
732 735 /*
733 736 * Obtain enough memory to hold header + rootpath. We prevent kernel
734 737 * memory exhaustion by freeing any previously allocated snapshot and
735 738 * refusing the operation; otherwise we would be allowing ioctl(),
736 739 * ioctl(), ioctl(), ..., panic.
737 740 */
738 741 if (di_setstate(st, IOC_SNAP) == -1)
739 742 return (EBUSY);
740 743
741 744 /*
742 745 * Initial memlist always holds di_all and the root_path - and
743 746 * is at least a page and size.
744 747 */
745 748 size = sizeof (struct di_all) +
746 749 sizeof (((struct dinfo_io *)(NULL))->root_path);
747 750 if (size < PAGESIZE)
748 751 size = PAGESIZE;
749 752 off = di_checkmem(st, 0, size);
750 753 all = DI_ALL_PTR(st);
751 754 off += sizeof (struct di_all); /* real length of di_all */
752 755
753 756 all->devcnt = devcnt;
754 757 all->command = st->command;
755 758 all->version = DI_SNAPSHOT_VERSION;
756 759 all->top_vhci_devinfo = 0; /* filled by build_vhci_list. */
757 760
758 761 /*
759 762 * Note the endianness in case we need to transport snapshot
760 763 * over the network.
761 764 */
762 765 #if defined(_LITTLE_ENDIAN)
763 766 all->endianness = DI_LITTLE_ENDIAN;
764 767 #else
765 768 all->endianness = DI_BIG_ENDIAN;
766 769 #endif
767 770
768 771 /* Copyin ioctl args, store in the snapshot. */
769 772 if (copyinstr((void *)arg, all->req_path,
770 773 sizeof (((struct dinfo_io *)(NULL))->root_path), &size) != 0) {
771 774 di_freemem(st);
772 775 (void) di_setstate(st, IOC_IDLE);
773 776 return (EFAULT);
774 777 }
775 778 (void) strcpy(all->root_path, all->req_path);
776 779 off += size; /* real length of root_path */
777 780
778 781 if ((st->command & DINFOCLEANUP) && !DEVICES_FILES_CLEANABLE(st)) {
779 782 di_freemem(st);
780 783 (void) di_setstate(st, IOC_IDLE);
781 784 return (EINVAL);
782 785 }
783 786
784 787 error = 0;
785 788 if ((st->command & DINFOCACHE) && !cache_args_valid(st, &error)) {
786 789 di_freemem(st);
787 790 (void) di_setstate(st, IOC_IDLE);
788 791 return (error);
789 792 }
790 793
791 794 /*
792 795 * Only the fully enabled version may force load drivers or read
793 796 * the parent private data from a driver.
794 797 */
795 798 if ((st->command & (DINFOPRIVDATA | DINFOFORCE)) != 0 &&
796 799 DI_UNPRIVILEGED_NODE(m)) {
797 800 di_freemem(st);
798 801 (void) di_setstate(st, IOC_IDLE);
799 802 return (EACCES);
800 803 }
801 804
802 805 /* Do we need private data? */
803 806 if (st->command & DINFOPRIVDATA) {
804 807 arg += sizeof (((struct dinfo_io *)(NULL))->root_path);
805 808
806 809 #ifdef _MULTI_DATAMODEL
807 810 switch (ddi_model_convert_from(mode & FMODELS)) {
808 811 case DDI_MODEL_ILP32: {
809 812 /*
810 813 * Cannot copy private data from 64-bit kernel
811 814 * to 32-bit app
812 815 */
813 816 di_freemem(st);
814 817 (void) di_setstate(st, IOC_IDLE);
815 818 return (EINVAL);
816 819 }
817 820 case DDI_MODEL_NONE:
818 821 if ((off = di_copyformat(off, st, arg, mode)) == 0) {
819 822 di_freemem(st);
820 823 (void) di_setstate(st, IOC_IDLE);
821 824 return (EFAULT);
822 825 }
823 826 break;
824 827 }
825 828 #else /* !_MULTI_DATAMODEL */
826 829 if ((off = di_copyformat(off, st, arg, mode)) == 0) {
827 830 di_freemem(st);
828 831 (void) di_setstate(st, IOC_IDLE);
829 832 return (EFAULT);
830 833 }
831 834 #endif /* _MULTI_DATAMODEL */
832 835 }
833 836
834 837 all->top_devinfo = DI_ALIGN(off);
835 838
836 839 /*
837 840 * For cache lookups we reallocate memory from scratch,
838 841 * so the value of "all" is no longer valid.
839 842 */
840 843 all = NULL;
841 844
842 845 if (st->command & DINFOCACHE) {
843 846 *rvalp = di_cache_lookup(st);
844 847 } else if (snapshot_is_cacheable(st)) {
845 848 DI_CACHE_LOCK(di_cache);
846 849 *rvalp = di_cache_update(st);
847 850 DI_CACHE_UNLOCK(di_cache);
848 851 } else
849 852 *rvalp = di_snapshot_and_clean(st);
850 853
851 854 if (*rvalp) {
852 855 DI_ALL_PTR(st)->map_size = *rvalp;
853 856 (void) di_setstate(st, IOC_DONE);
854 857 } else {
855 858 di_freemem(st);
856 859 (void) di_setstate(st, IOC_IDLE);
857 860 }
858 861
859 862 return (0);
860 863 }
861 864
862 865 /*
863 866 * Get a chunk of memory >= size, for the snapshot
864 867 */
865 868 static void
866 869 di_allocmem(struct di_state *st, size_t size)
867 870 {
868 871 struct di_mem *mem = kmem_zalloc(sizeof (struct di_mem), KM_SLEEP);
869 872
870 873 /*
871 874 * Round up size to nearest power of 2. If it is less
872 875 * than st->mem_size, set it to st->mem_size (i.e.,
873 876 * the mem_size is doubled every time) to reduce the
874 877 * number of memory allocations.
875 878 */
876 879 size_t tmp = 1;
877 880 while (tmp < size) {
878 881 tmp <<= 1;
879 882 }
880 883 size = (tmp > st->mem_size) ? tmp : st->mem_size;
881 884
882 885 mem->buf = ddi_umem_alloc(size, DDI_UMEM_SLEEP, &mem->cook);
883 886 mem->buf_size = size;
884 887
885 888 dcmn_err2((CE_CONT, "di_allocmem: mem_size=%x\n", st->mem_size));
886 889
887 890 if (st->mem_size == 0) { /* first chunk */
888 891 st->memlist = mem;
889 892 } else {
890 893 /*
891 894 * locate end of linked list and add a chunk at the end
892 895 */
893 896 struct di_mem *dcp = st->memlist;
894 897 while (dcp->next != NULL) {
895 898 dcp = dcp->next;
896 899 }
897 900
898 901 dcp->next = mem;
899 902 }
900 903
901 904 st->mem_size += size;
902 905 }
903 906
904 907 /*
905 908 * Copy upto bufsiz bytes of the memlist to buf
906 909 */
907 910 static void
908 911 di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz)
909 912 {
910 913 struct di_mem *dcp;
911 914 size_t copysz;
912 915
913 916 if (st->mem_size == 0) {
914 917 ASSERT(st->memlist == NULL);
915 918 return;
916 919 }
917 920
918 921 copysz = 0;
919 922 for (dcp = st->memlist; dcp; dcp = dcp->next) {
920 923
921 924 ASSERT(bufsiz > 0);
922 925
923 926 if (bufsiz <= dcp->buf_size)
924 927 copysz = bufsiz;
925 928 else
926 929 copysz = dcp->buf_size;
927 930
928 931 bcopy(dcp->buf, buf, copysz);
929 932
930 933 buf += copysz;
931 934 bufsiz -= copysz;
932 935
933 936 if (bufsiz == 0)
934 937 break;
935 938 }
936 939 }
937 940
938 941 /*
939 942 * Free all memory for the snapshot
940 943 */
941 944 static void
942 945 di_freemem(struct di_state *st)
943 946 {
944 947 struct di_mem *dcp, *tmp;
945 948
946 949 dcmn_err2((CE_CONT, "di_freemem\n"));
947 950
948 951 if (st->mem_size) {
949 952 dcp = st->memlist;
950 953 while (dcp) { /* traverse the linked list */
951 954 tmp = dcp;
952 955 dcp = dcp->next;
953 956 ddi_umem_free(tmp->cook);
954 957 kmem_free(tmp, sizeof (struct di_mem));
955 958 }
956 959 st->mem_size = 0;
957 960 st->memlist = NULL;
958 961 }
959 962
960 963 ASSERT(st->mem_size == 0);
961 964 ASSERT(st->memlist == NULL);
962 965 }
963 966
964 967 /*
965 968 * Copies cached data to the di_state structure.
966 969 * Returns:
967 970 * - size of data copied, on SUCCESS
968 971 * - 0 on failure
969 972 */
970 973 static int
971 974 di_cache2mem(struct di_cache *cache, struct di_state *st)
972 975 {
973 976 caddr_t pa;
974 977
975 978 ASSERT(st->mem_size == 0);
976 979 ASSERT(st->memlist == NULL);
977 980 ASSERT(!servicing_interrupt());
978 981 ASSERT(DI_CACHE_LOCKED(*cache));
979 982
980 983 if (cache->cache_size == 0) {
981 984 ASSERT(cache->cache_data == NULL);
982 985 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping copy"));
983 986 return (0);
984 987 }
985 988
986 989 ASSERT(cache->cache_data);
987 990
988 991 di_allocmem(st, cache->cache_size);
989 992
990 993 pa = di_mem_addr(st, 0);
991 994
992 995 ASSERT(pa);
993 996
994 997 /*
995 998 * Verify that di_allocmem() allocates contiguous memory,
996 999 * so that it is safe to do straight bcopy()
997 1000 */
998 1001 ASSERT(st->memlist != NULL);
999 1002 ASSERT(st->memlist->next == NULL);
1000 1003 bcopy(cache->cache_data, pa, cache->cache_size);
1001 1004
1002 1005 return (cache->cache_size);
1003 1006 }
1004 1007
1005 1008 /*
1006 1009 * Copies a snapshot from di_state to the cache
1007 1010 * Returns:
1008 1011 * - 0 on failure
1009 1012 * - size of copied data on success
1010 1013 */
1011 1014 static size_t
1012 1015 di_mem2cache(struct di_state *st, struct di_cache *cache)
1013 1016 {
1014 1017 size_t map_size;
1015 1018
1016 1019 ASSERT(cache->cache_size == 0);
1017 1020 ASSERT(cache->cache_data == NULL);
1018 1021 ASSERT(!servicing_interrupt());
1019 1022 ASSERT(DI_CACHE_LOCKED(*cache));
1020 1023
1021 1024 if (st->mem_size == 0) {
1022 1025 ASSERT(st->memlist == NULL);
1023 1026 CACHE_DEBUG((DI_ERR, "Empty memlist. Skipping copy"));
1024 1027 return (0);
1025 1028 }
1026 1029
1027 1030 ASSERT(st->memlist);
1028 1031
1029 1032 /*
1030 1033 * The size of the memory list may be much larger than the
1031 1034 * size of valid data (map_size). Cache only the valid data
1032 1035 */
1033 1036 map_size = DI_ALL_PTR(st)->map_size;
1034 1037 if (map_size == 0 || map_size < sizeof (struct di_all) ||
1035 1038 map_size > st->mem_size) {
1036 1039 CACHE_DEBUG((DI_ERR, "cannot cache: bad size: 0x%x", map_size));
1037 1040 return (0);
1038 1041 }
1039 1042
1040 1043 cache->cache_data = kmem_alloc(map_size, KM_SLEEP);
1041 1044 cache->cache_size = map_size;
1042 1045 di_copymem(st, cache->cache_data, cache->cache_size);
1043 1046
1044 1047 return (map_size);
1045 1048 }
1046 1049
1047 1050 /*
1048 1051 * Make sure there is at least "size" bytes memory left before
1049 1052 * going on. Otherwise, start on a new chunk.
1050 1053 */
1051 1054 static di_off_t
1052 1055 di_checkmem(struct di_state *st, di_off_t off, size_t size)
1053 1056 {
1054 1057 dcmn_err3((CE_CONT, "di_checkmem: off=%x size=%x\n",
1055 1058 off, (int)size));
1056 1059
1057 1060 /*
1058 1061 * di_checkmem() shouldn't be called with a size of zero.
1059 1062 * But in case it is, we want to make sure we return a valid
1060 1063 * offset within the memlist and not an offset that points us
1061 1064 * at the end of the memlist.
1062 1065 */
1063 1066 if (size == 0) {
1064 1067 dcmn_err((CE_WARN, "di_checkmem: invalid zero size used"));
1065 1068 size = 1;
1066 1069 }
1067 1070
1068 1071 off = DI_ALIGN(off);
1069 1072 if ((st->mem_size - off) < size) {
1070 1073 off = st->mem_size;
1071 1074 di_allocmem(st, size);
1072 1075 }
1073 1076
1074 1077 /* verify that return value is aligned */
1075 1078 ASSERT(off == DI_ALIGN(off));
1076 1079 return (off);
1077 1080 }
1078 1081
1079 1082 /*
1080 1083 * Copy the private data format from ioctl arg.
1081 1084 * On success, the ending offset is returned. On error 0 is returned.
1082 1085 */
1083 1086 static di_off_t
1084 1087 di_copyformat(di_off_t off, struct di_state *st, intptr_t arg, int mode)
1085 1088 {
1086 1089 di_off_t size;
1087 1090 struct di_priv_data *priv;
1088 1091 struct di_all *all = DI_ALL_PTR(st);
1089 1092
1090 1093 dcmn_err2((CE_CONT, "di_copyformat: off=%x, arg=%p mode=%x\n",
1091 1094 off, (void *)arg, mode));
1092 1095
1093 1096 /*
1094 1097 * Copyin data and check version.
1095 1098 * We only handle private data version 0.
1096 1099 */
1097 1100 priv = kmem_alloc(sizeof (struct di_priv_data), KM_SLEEP);
1098 1101 if ((ddi_copyin((void *)arg, priv, sizeof (struct di_priv_data),
1099 1102 mode) != 0) || (priv->version != DI_PRIVDATA_VERSION_0)) {
1100 1103 kmem_free(priv, sizeof (struct di_priv_data));
1101 1104 return (0);
1102 1105 }
1103 1106
1104 1107 /*
1105 1108 * Save di_priv_data copied from userland in snapshot.
1106 1109 */
1107 1110 all->pd_version = priv->version;
1108 1111 all->n_ppdata = priv->n_parent;
1109 1112 all->n_dpdata = priv->n_driver;
1110 1113
1111 1114 /*
1112 1115 * copyin private data format, modify offset accordingly
1113 1116 */
1114 1117 if (all->n_ppdata) { /* parent private data format */
1115 1118 /*
1116 1119 * check memory
1117 1120 */
1118 1121 size = all->n_ppdata * sizeof (struct di_priv_format);
1119 1122 all->ppdata_format = off = di_checkmem(st, off, size);
1120 1123 if (ddi_copyin(priv->parent, di_mem_addr(st, off), size,
1121 1124 mode) != 0) {
1122 1125 kmem_free(priv, sizeof (struct di_priv_data));
1123 1126 return (0);
1124 1127 }
1125 1128
1126 1129 off += size;
1127 1130 }
1128 1131
1129 1132 if (all->n_dpdata) { /* driver private data format */
1130 1133 /*
1131 1134 * check memory
1132 1135 */
1133 1136 size = all->n_dpdata * sizeof (struct di_priv_format);
1134 1137 all->dpdata_format = off = di_checkmem(st, off, size);
1135 1138 if (ddi_copyin(priv->driver, di_mem_addr(st, off), size,
1136 1139 mode) != 0) {
1137 1140 kmem_free(priv, sizeof (struct di_priv_data));
1138 1141 return (0);
1139 1142 }
1140 1143
1141 1144 off += size;
1142 1145 }
1143 1146
1144 1147 kmem_free(priv, sizeof (struct di_priv_data));
1145 1148 return (off);
1146 1149 }
1147 1150
1148 1151 /*
1149 1152 * Return the real address based on the offset (off) within snapshot
1150 1153 */
1151 1154 static void *
1152 1155 di_mem_addr(struct di_state *st, di_off_t off)
1153 1156 {
1154 1157 struct di_mem *dcp = st->memlist;
1155 1158
1156 1159 dcmn_err3((CE_CONT, "di_mem_addr: dcp=%p off=%x\n",
1157 1160 (void *)dcp, off));
1158 1161
1159 1162 ASSERT(off < st->mem_size);
1160 1163
1161 1164 while (off >= dcp->buf_size) {
1162 1165 off -= dcp->buf_size;
1163 1166 dcp = dcp->next;
1164 1167 }
1165 1168
1166 1169 dcmn_err3((CE_CONT, "di_mem_addr: new off=%x, return = %p\n",
1167 1170 off, (void *)(dcp->buf + off)));
1168 1171
1169 1172 return (dcp->buf + off);
1170 1173 }
1171 1174
1172 1175 /*
1173 1176 * Ideally we would use the whole key to derive the hash
1174 1177 * value. However, the probability that two keys will
1175 1178 * have the same dip (or pip) is very low, so
1176 1179 * hashing by dip (or pip) pointer should suffice.
1177 1180 */
1178 1181 static uint_t
1179 1182 di_hash_byptr(void *arg, mod_hash_key_t key)
1180 1183 {
1181 1184 struct di_key *dik = key;
1182 1185 size_t rshift;
1183 1186 void *ptr;
1184 1187
1185 1188 ASSERT(arg == NULL);
1186 1189
1187 1190 switch (dik->k_type) {
1188 1191 case DI_DKEY:
1189 1192 ptr = dik->k_u.dkey.dk_dip;
1190 1193 rshift = highbit(sizeof (struct dev_info));
1191 1194 break;
1192 1195 case DI_PKEY:
1193 1196 ptr = dik->k_u.pkey.pk_pip;
1194 1197 rshift = highbit(sizeof (struct mdi_pathinfo));
1195 1198 break;
1196 1199 default:
1197 1200 panic("devinfo: unknown key type");
1198 1201 /*NOTREACHED*/
1199 1202 }
1200 1203 return (mod_hash_byptr((void *)rshift, ptr));
1201 1204 }
1202 1205
1203 1206 static void
1204 1207 di_key_dtor(mod_hash_key_t key)
1205 1208 {
1206 1209 char *path_addr;
1207 1210 struct di_key *dik = key;
1208 1211
1209 1212 switch (dik->k_type) {
1210 1213 case DI_DKEY:
1211 1214 break;
1212 1215 case DI_PKEY:
1213 1216 path_addr = dik->k_u.pkey.pk_path_addr;
1214 1217 if (path_addr)
1215 1218 kmem_free(path_addr, strlen(path_addr) + 1);
1216 1219 break;
1217 1220 default:
1218 1221 panic("devinfo: unknown key type");
1219 1222 /*NOTREACHED*/
1220 1223 }
1221 1224
1222 1225 kmem_free(dik, sizeof (struct di_key));
1223 1226 }
1224 1227
1225 1228 static int
1226 1229 di_dkey_cmp(struct di_dkey *dk1, struct di_dkey *dk2)
1227 1230 {
1228 1231 if (dk1->dk_dip != dk2->dk_dip)
1229 1232 return (dk1->dk_dip > dk2->dk_dip ? 1 : -1);
1230 1233
1231 1234 if (dk1->dk_major != DDI_MAJOR_T_NONE &&
1232 1235 dk2->dk_major != DDI_MAJOR_T_NONE) {
1233 1236 if (dk1->dk_major != dk2->dk_major)
1234 1237 return (dk1->dk_major > dk2->dk_major ? 1 : -1);
1235 1238
1236 1239 if (dk1->dk_inst != dk2->dk_inst)
1237 1240 return (dk1->dk_inst > dk2->dk_inst ? 1 : -1);
1238 1241 }
1239 1242
1240 1243 if (dk1->dk_nodeid != dk2->dk_nodeid)
1241 1244 return (dk1->dk_nodeid > dk2->dk_nodeid ? 1 : -1);
1242 1245
1243 1246 return (0);
1244 1247 }
1245 1248
1246 1249 static int
1247 1250 di_pkey_cmp(struct di_pkey *pk1, struct di_pkey *pk2)
1248 1251 {
1249 1252 char *p1, *p2;
1250 1253 int rv;
1251 1254
1252 1255 if (pk1->pk_pip != pk2->pk_pip)
1253 1256 return (pk1->pk_pip > pk2->pk_pip ? 1 : -1);
1254 1257
1255 1258 p1 = pk1->pk_path_addr;
1256 1259 p2 = pk2->pk_path_addr;
1257 1260
1258 1261 p1 = p1 ? p1 : "";
1259 1262 p2 = p2 ? p2 : "";
1260 1263
1261 1264 rv = strcmp(p1, p2);
1262 1265 if (rv)
1263 1266 return (rv > 0 ? 1 : -1);
1264 1267
1265 1268 if (pk1->pk_client != pk2->pk_client)
1266 1269 return (pk1->pk_client > pk2->pk_client ? 1 : -1);
1267 1270
1268 1271 if (pk1->pk_phci != pk2->pk_phci)
1269 1272 return (pk1->pk_phci > pk2->pk_phci ? 1 : -1);
1270 1273
1271 1274 return (0);
1272 1275 }
1273 1276
1274 1277 static int
1275 1278 di_key_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
1276 1279 {
1277 1280 struct di_key *dik1, *dik2;
1278 1281
1279 1282 dik1 = key1;
1280 1283 dik2 = key2;
1281 1284
1282 1285 if (dik1->k_type != dik2->k_type) {
1283 1286 panic("devinfo: mismatched keys");
1284 1287 /*NOTREACHED*/
1285 1288 }
1286 1289
1287 1290 switch (dik1->k_type) {
1288 1291 case DI_DKEY:
1289 1292 return (di_dkey_cmp(&(dik1->k_u.dkey), &(dik2->k_u.dkey)));
1290 1293 case DI_PKEY:
1291 1294 return (di_pkey_cmp(&(dik1->k_u.pkey), &(dik2->k_u.pkey)));
1292 1295 default:
1293 1296 panic("devinfo: unknown key type");
1294 1297 /*NOTREACHED*/
1295 1298 }
1296 1299 }
1297 1300
1298 1301 static void
1299 1302 di_copy_aliases(struct di_state *st, alias_pair_t *apair, di_off_t *offp)
1300 1303 {
1301 1304 di_off_t off;
1302 1305 struct di_all *all = DI_ALL_PTR(st);
1303 1306 struct di_alias *di_alias;
1304 1307 di_off_t curroff;
1305 1308 dev_info_t *currdip;
1306 1309 size_t size;
1307 1310
1308 1311 currdip = NULL;
1309 1312 if (resolve_pathname(apair->pair_alias, &currdip, NULL, NULL) != 0) {
1310 1313 return;
1311 1314 }
1312 1315
1313 1316 if (di_dip_find(st, currdip, &curroff) != 0) {
1314 1317 ndi_rele_devi(currdip);
1315 1318 return;
1316 1319 }
1317 1320 ndi_rele_devi(currdip);
1318 1321
1319 1322 off = *offp;
1320 1323 size = sizeof (struct di_alias);
1321 1324 size += strlen(apair->pair_alias) + 1;
1322 1325 off = di_checkmem(st, off, size);
1323 1326 di_alias = DI_ALIAS(di_mem_addr(st, off));
1324 1327
1325 1328 di_alias->self = off;
1326 1329 di_alias->next = all->aliases;
1327 1330 all->aliases = off;
1328 1331 (void) strcpy(di_alias->alias, apair->pair_alias);
1329 1332 di_alias->curroff = curroff;
1330 1333
1331 1334 off += size;
1332 1335
1333 1336 *offp = off;
1334 1337 }
1335 1338
1336 1339 /*
1337 1340 * This is the main function that takes a snapshot
1338 1341 */
1339 1342 static di_off_t
1340 1343 di_snapshot(struct di_state *st)
1341 1344 {
1342 1345 di_off_t off;
1343 1346 struct di_all *all;
1344 1347 dev_info_t *rootnode;
1345 1348 char buf[80];
1346 1349 int plen;
1347 1350 char *path;
1348 1351 vnode_t *vp;
1349 1352 int i;
1350 1353
1351 1354 all = DI_ALL_PTR(st);
1352 1355 dcmn_err((CE_CONT, "Taking a snapshot of devinfo tree...\n"));
1353 1356
1354 1357 /*
1355 1358 * Translate requested root path if an alias and snap-root != "/"
1356 1359 */
1357 1360 if (ddi_aliases_present == B_TRUE && strcmp(all->root_path, "/") != 0) {
1358 1361 /* If there is no redirected alias, use root_path as is */
1359 1362 rootnode = ddi_alias_redirect(all->root_path);
1360 1363 if (rootnode) {
1361 1364 (void) ddi_pathname(rootnode, all->root_path);
1362 1365 goto got_root;
1363 1366 }
1364 1367 }
1365 1368
1366 1369 /*
1367 1370 * Verify path before entrusting it to e_ddi_hold_devi_by_path because
1368 1371 * some platforms have OBP bugs where executing the NDI_PROMNAME code
1369 1372 * path against an invalid path results in panic. The lookupnameat
1370 1373 * is done relative to rootdir without a leading '/' on "devices/"
1371 1374 * to force the lookup to occur in the global zone.
1372 1375 */
1373 1376 plen = strlen("devices/") + strlen(all->root_path) + 1;
1374 1377 path = kmem_alloc(plen, KM_SLEEP);
1375 1378 (void) snprintf(path, plen, "devices/%s", all->root_path);
1376 1379 if (lookupnameat(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp, rootdir)) {
1377 1380 dcmn_err((CE_CONT, "Devinfo node %s not found\n",
1378 1381 all->root_path));
1379 1382 kmem_free(path, plen);
1380 1383 return (0);
1381 1384 }
1382 1385 kmem_free(path, plen);
1383 1386 VN_RELE(vp);
1384 1387
1385 1388 /*
1386 1389 * Hold the devinfo node referred by the path.
1387 1390 */
1388 1391 rootnode = e_ddi_hold_devi_by_path(all->root_path, 0);
1389 1392 if (rootnode == NULL) {
1390 1393 dcmn_err((CE_CONT, "Devinfo node %s not found\n",
1391 1394 all->root_path));
1392 1395 return (0);
1393 1396 }
1394 1397
1395 1398 got_root:
1396 1399 (void) snprintf(buf, sizeof (buf),
1397 1400 "devinfo registered dips (statep=%p)", (void *)st);
1398 1401
1399 1402 st->reg_dip_hash = mod_hash_create_extended(buf, 64,
1400 1403 di_key_dtor, mod_hash_null_valdtor, di_hash_byptr,
1401 1404 NULL, di_key_cmp, KM_SLEEP);
1402 1405
1403 1406
1404 1407 (void) snprintf(buf, sizeof (buf),
1405 1408 "devinfo registered pips (statep=%p)", (void *)st);
1406 1409
1407 1410 st->reg_pip_hash = mod_hash_create_extended(buf, 64,
1408 1411 di_key_dtor, mod_hash_null_valdtor, di_hash_byptr,
1409 1412 NULL, di_key_cmp, KM_SLEEP);
1410 1413
1411 1414 if (DINFOHP & st->command) {
1412 1415 list_create(&st->hp_list, sizeof (i_hp_t),
1413 1416 offsetof(i_hp_t, hp_link));
1414 1417 }
1415 1418
1416 1419 /*
1417 1420 * copy the device tree
1418 1421 */
1419 1422 off = di_copytree(DEVI(rootnode), &all->top_devinfo, st);
1420 1423
1421 1424 if (DINFOPATH & st->command) {
1422 1425 mdi_walk_vhcis(build_vhci_list, st);
1423 1426 }
1424 1427
1425 1428 if (DINFOHP & st->command) {
1426 1429 di_hotplug_children(st);
1427 1430 }
1428 1431
1429 1432 ddi_release_devi(rootnode);
1430 1433
1431 1434 /*
1432 1435 * copy the devnames array
1433 1436 */
1434 1437 all->devnames = off;
1435 1438 off = di_copydevnm(&all->devnames, st);
1436 1439
1437 1440
1438 1441 /* initialize the hash tables */
1439 1442 st->lnode_count = 0;
1440 1443 st->link_count = 0;
1441 1444
1442 1445 if (DINFOLYR & st->command) {
1443 1446 off = di_getlink_data(off, st);
1444 1447 }
1445 1448
1446 1449 all->aliases = 0;
1447 1450 if (ddi_aliases_present == B_FALSE)
1448 1451 goto done;
1449 1452
1450 1453 for (i = 0; i < ddi_aliases.dali_num_pairs; i++) {
1451 1454 di_copy_aliases(st, &(ddi_aliases.dali_alias_pairs[i]), &off);
1452 1455 }
1453 1456
1454 1457 done:
1455 1458 /*
1456 1459 * Free up hash tables
1457 1460 */
1458 1461 mod_hash_destroy_hash(st->reg_dip_hash);
1459 1462 mod_hash_destroy_hash(st->reg_pip_hash);
1460 1463
1461 1464 /*
1462 1465 * Record the timestamp now that we are done with snapshot.
1463 1466 *
1464 1467 * We compute the checksum later and then only if we cache
1465 1468 * the snapshot, since checksumming adds some overhead.
1466 1469 * The checksum is checked later if we read the cache file.
1467 1470 * from disk.
1468 1471 *
1469 1472 * Set checksum field to 0 as CRC is calculated with that
1470 1473 * field set to 0.
1471 1474 */
1472 1475 all->snapshot_time = ddi_get_time();
1473 1476 all->cache_checksum = 0;
1474 1477
1475 1478 ASSERT(all->snapshot_time != 0);
1476 1479
1477 1480 return (off);
1478 1481 }
1479 1482
1480 1483 /*
1481 1484 * Take a snapshot and clean /etc/devices files if DINFOCLEANUP is set
1482 1485 */
1483 1486 static di_off_t
1484 1487 di_snapshot_and_clean(struct di_state *st)
1485 1488 {
1486 1489 di_off_t off;
1487 1490
1488 1491 modunload_disable();
1489 1492 off = di_snapshot(st);
1490 1493 if (off != 0 && (st->command & DINFOCLEANUP)) {
1491 1494 ASSERT(DEVICES_FILES_CLEANABLE(st));
1492 1495 /*
1493 1496 * Cleanup /etc/devices files:
1494 1497 * In order to accurately account for the system configuration
1495 1498 * in /etc/devices files, the appropriate drivers must be
1496 1499 * fully configured before the cleanup starts.
1497 1500 * So enable modunload only after the cleanup.
1498 1501 */
1499 1502 i_ddi_clean_devices_files();
1500 1503 /*
1501 1504 * Remove backing store nodes for unused devices,
1502 1505 * which retain past permissions customizations
1503 1506 * and may be undesired for newly configured devices.
1504 1507 */
1505 1508 dev_devices_cleanup();
1506 1509 }
1507 1510 modunload_enable();
1508 1511
1509 1512 return (off);
1510 1513 }
1511 1514
1512 1515 /*
1513 1516 * construct vhci linkage in the snapshot.
1514 1517 */
1515 1518 static int
1516 1519 build_vhci_list(dev_info_t *vh_devinfo, void *arg)
1517 1520 {
1518 1521 struct di_all *all;
1519 1522 struct di_node *me;
1520 1523 struct di_state *st;
1521 1524 di_off_t off;
1522 1525 phci_walk_arg_t pwa;
1523 1526
1524 1527 dcmn_err3((CE_CONT, "build_vhci list\n"));
1525 1528
1526 1529 dcmn_err3((CE_CONT, "vhci node %s%d\n",
1527 1530 ddi_driver_name(vh_devinfo), ddi_get_instance(vh_devinfo)));
1528 1531
1529 1532 st = (struct di_state *)arg;
1530 1533 if (di_dip_find(st, vh_devinfo, &off) != 0) {
1531 1534 dcmn_err((CE_WARN, "di_dip_find error for the given node\n"));
1532 1535 return (DDI_WALK_TERMINATE);
1533 1536 }
1534 1537
1535 1538 dcmn_err3((CE_CONT, "st->mem_size: %d vh_devinfo off: 0x%x\n",
1536 1539 st->mem_size, off));
1537 1540
1538 1541 all = DI_ALL_PTR(st);
1539 1542 if (all->top_vhci_devinfo == 0) {
1540 1543 all->top_vhci_devinfo = off;
1541 1544 } else {
1542 1545 me = DI_NODE(di_mem_addr(st, all->top_vhci_devinfo));
1543 1546
1544 1547 while (me->next_vhci != 0) {
1545 1548 me = DI_NODE(di_mem_addr(st, me->next_vhci));
1546 1549 }
1547 1550
1548 1551 me->next_vhci = off;
1549 1552 }
1550 1553
1551 1554 pwa.off = off;
1552 1555 pwa.st = st;
1553 1556 mdi_vhci_walk_phcis(vh_devinfo, build_phci_list, &pwa);
1554 1557
1555 1558 return (DDI_WALK_CONTINUE);
1556 1559 }
1557 1560
1558 1561 /*
1559 1562 * construct phci linkage for the given vhci in the snapshot.
1560 1563 */
1561 1564 static int
1562 1565 build_phci_list(dev_info_t *ph_devinfo, void *arg)
1563 1566 {
1564 1567 struct di_node *vh_di_node;
1565 1568 struct di_node *me;
1566 1569 phci_walk_arg_t *pwa;
1567 1570 di_off_t off;
1568 1571
1569 1572 pwa = (phci_walk_arg_t *)arg;
1570 1573
1571 1574 dcmn_err3((CE_CONT, "build_phci list for vhci at offset: 0x%x\n",
1572 1575 pwa->off));
1573 1576
1574 1577 vh_di_node = DI_NODE(di_mem_addr(pwa->st, pwa->off));
1575 1578 if (di_dip_find(pwa->st, ph_devinfo, &off) != 0) {
1576 1579 dcmn_err((CE_WARN, "di_dip_find error for the given node\n"));
1577 1580 return (DDI_WALK_TERMINATE);
1578 1581 }
1579 1582
1580 1583 dcmn_err3((CE_CONT, "phci node %s%d, at offset 0x%x\n",
1581 1584 ddi_driver_name(ph_devinfo), ddi_get_instance(ph_devinfo), off));
1582 1585
1583 1586 if (vh_di_node->top_phci == 0) {
1584 1587 vh_di_node->top_phci = off;
1585 1588 return (DDI_WALK_CONTINUE);
1586 1589 }
1587 1590
1588 1591 me = DI_NODE(di_mem_addr(pwa->st, vh_di_node->top_phci));
1589 1592
1590 1593 while (me->next_phci != 0) {
1591 1594 me = DI_NODE(di_mem_addr(pwa->st, me->next_phci));
1592 1595 }
1593 1596 me->next_phci = off;
1594 1597
1595 1598 return (DDI_WALK_CONTINUE);
1596 1599 }
1597 1600
1598 1601 /*
1599 1602 * Assumes all devinfo nodes in device tree have been snapshotted
1600 1603 */
1601 1604 static void
1602 1605 snap_driver_list(struct di_state *st, struct devnames *dnp, di_off_t *off_p)
1603 1606 {
1604 1607 struct dev_info *node;
1605 1608 struct di_node *me;
1606 1609 di_off_t off;
1607 1610
1608 1611 ASSERT(mutex_owned(&dnp->dn_lock));
1609 1612
1610 1613 node = DEVI(dnp->dn_head);
1611 1614 for (; node; node = node->devi_next) {
1612 1615 if (di_dip_find(st, (dev_info_t *)node, &off) != 0)
1613 1616 continue;
1614 1617
1615 1618 ASSERT(off > 0);
1616 1619 me = DI_NODE(di_mem_addr(st, off));
1617 1620 ASSERT(me->next == 0 || me->next == -1);
1618 1621 /*
1619 1622 * Only nodes which were BOUND when they were
1620 1623 * snapshotted will be added to per-driver list.
1621 1624 */
1622 1625 if (me->next != -1)
1623 1626 continue;
1624 1627
1625 1628 *off_p = off;
1626 1629 off_p = &me->next;
1627 1630 }
1628 1631
1629 1632 *off_p = 0;
1630 1633 }
1631 1634
1632 1635 /*
1633 1636 * Copy the devnames array, so we have a list of drivers in the snapshot.
1634 1637 * Also makes it possible to locate the per-driver devinfo nodes.
1635 1638 */
1636 1639 static di_off_t
1637 1640 di_copydevnm(di_off_t *off_p, struct di_state *st)
1638 1641 {
1639 1642 int i;
1640 1643 di_off_t off;
1641 1644 size_t size;
1642 1645 struct di_devnm *dnp;
1643 1646
1644 1647 dcmn_err2((CE_CONT, "di_copydevnm: *off_p = %p\n", (void *)off_p));
1645 1648
1646 1649 /*
1647 1650 * make sure there is some allocated memory
1648 1651 */
1649 1652 size = devcnt * sizeof (struct di_devnm);
1650 1653 *off_p = off = di_checkmem(st, *off_p, size);
1651 1654 dnp = DI_DEVNM(di_mem_addr(st, off));
1652 1655 off += size;
1653 1656
1654 1657 dcmn_err((CE_CONT, "Start copying devnamesp[%d] at offset 0x%x\n",
1655 1658 devcnt, off));
1656 1659
1657 1660 for (i = 0; i < devcnt; i++) {
1658 1661 if (devnamesp[i].dn_name == NULL) {
1659 1662 continue;
1660 1663 }
1661 1664
1662 1665 /*
1663 1666 * dn_name is not freed during driver unload or removal.
1664 1667 *
1665 1668 * There is a race condition when make_devname() changes
1666 1669 * dn_name during our strcpy. This should be rare since
1667 1670 * only add_drv does this. At any rate, we never had a
1668 1671 * problem with ddi_name_to_major(), which should have
1669 1672 * the same problem.
1670 1673 */
1671 1674 dcmn_err2((CE_CONT, "di_copydevnm: %s%d, off=%x\n",
1672 1675 devnamesp[i].dn_name, devnamesp[i].dn_instance, off));
1673 1676
1674 1677 size = strlen(devnamesp[i].dn_name) + 1;
1675 1678 dnp[i].name = off = di_checkmem(st, off, size);
1676 1679 (void) strcpy((char *)di_mem_addr(st, off),
1677 1680 devnamesp[i].dn_name);
1678 1681 off += size;
1679 1682
1680 1683 mutex_enter(&devnamesp[i].dn_lock);
1681 1684
1682 1685 /*
1683 1686 * Snapshot per-driver node list
1684 1687 */
1685 1688 snap_driver_list(st, &devnamesp[i], &dnp[i].head);
1686 1689
1687 1690 /*
1688 1691 * This is not used by libdevinfo, leave it for now
1689 1692 */
1690 1693 dnp[i].flags = devnamesp[i].dn_flags;
1691 1694 dnp[i].instance = devnamesp[i].dn_instance;
1692 1695
1693 1696 /*
1694 1697 * get global properties
1695 1698 */
1696 1699 if ((DINFOPROP & st->command) &&
1697 1700 devnamesp[i].dn_global_prop_ptr) {
1698 1701 dnp[i].global_prop = off;
1699 1702 off = di_getprop(DI_PROP_GLB_LIST,
1700 1703 &devnamesp[i].dn_global_prop_ptr->prop_list,
1701 1704 &dnp[i].global_prop, st, NULL);
1702 1705 }
1703 1706
1704 1707 /*
1705 1708 * Bit encode driver ops: & bus_ops, cb_ops, & cb_ops->cb_str
1706 1709 */
1707 1710 if (CB_DRV_INSTALLED(devopsp[i])) {
1708 1711 if (devopsp[i]->devo_cb_ops) {
1709 1712 dnp[i].ops |= DI_CB_OPS;
1710 1713 if (devopsp[i]->devo_cb_ops->cb_str)
1711 1714 dnp[i].ops |= DI_STREAM_OPS;
1712 1715 }
1713 1716 if (NEXUS_DRV(devopsp[i])) {
1714 1717 dnp[i].ops |= DI_BUS_OPS;
1715 1718 }
1716 1719 }
1717 1720
1718 1721 mutex_exit(&devnamesp[i].dn_lock);
1719 1722 }
1720 1723
1721 1724 dcmn_err((CE_CONT, "End copying devnamesp at offset 0x%x\n", off));
1722 1725
1723 1726 return (off);
1724 1727 }
1725 1728
1726 1729 /*
1727 1730 * Copy the kernel devinfo tree. The tree and the devnames array forms
1728 1731 * the entire snapshot (see also di_copydevnm).
1729 1732 */
1730 1733 static di_off_t
1731 1734 di_copytree(struct dev_info *root, di_off_t *off_p, struct di_state *st)
1732 1735 {
1733 1736 di_off_t off;
1734 1737 struct dev_info *node;
1735 1738 struct di_stack *dsp = kmem_zalloc(sizeof (struct di_stack), KM_SLEEP);
1736 1739
1737 1740 dcmn_err((CE_CONT, "di_copytree: root = %p, *off_p = %x\n",
1738 1741 (void *)root, *off_p));
1739 1742
1740 1743 /* force attach drivers */
1741 1744 if (i_ddi_devi_attached((dev_info_t *)root) &&
1742 1745 (st->command & DINFOSUBTREE) && (st->command & DINFOFORCE)) {
1743 1746 (void) ndi_devi_config((dev_info_t *)root,
1744 1747 NDI_CONFIG | NDI_DEVI_PERSIST | NDI_NO_EVENT |
1745 1748 NDI_DRV_CONF_REPROBE);
1746 1749 }
1747 1750
1748 1751 /*
1749 1752 * Push top_devinfo onto a stack
1750 1753 *
1751 1754 * The stack is necessary to avoid recursion, which can overrun
1752 1755 * the kernel stack.
1753 1756 */
1754 1757 PUSH_STACK(dsp, root, off_p);
1755 1758
1756 1759 /*
1757 1760 * As long as there is a node on the stack, copy the node.
1758 1761 * di_copynode() is responsible for pushing and popping
1759 1762 * child and sibling nodes on the stack.
1760 1763 */
1761 1764 while (!EMPTY_STACK(dsp)) {
1762 1765 node = TOP_NODE(dsp);
1763 1766 off = di_copynode(node, dsp, st);
1764 1767 }
1765 1768
1766 1769 /*
1767 1770 * Free the stack structure
1768 1771 */
1769 1772 kmem_free(dsp, sizeof (struct di_stack));
1770 1773
1771 1774 return (off);
1772 1775 }
1773 1776
1774 1777 /*
1775 1778 * This is the core function, which copies all data associated with a single
1776 1779 * node into the snapshot. The amount of information is determined by the
1777 1780 * ioctl command.
1778 1781 */
1779 1782 static di_off_t
1780 1783 di_copynode(struct dev_info *node, struct di_stack *dsp, struct di_state *st)
1781 1784 {
1782 1785 di_off_t off;
1783 1786 struct di_node *me;
1784 1787 size_t size;
1785 1788 struct dev_info *n;
1786 1789
1787 1790 dcmn_err2((CE_CONT, "di_copynode: depth = %x\n", dsp->depth));
1788 1791 ASSERT((node != NULL) && (node == TOP_NODE(dsp)));
1789 1792
1790 1793 /*
1791 1794 * check memory usage, and fix offsets accordingly.
1792 1795 */
1793 1796 size = sizeof (struct di_node);
1794 1797 *(TOP_OFFSET(dsp)) = off = di_checkmem(st, *(TOP_OFFSET(dsp)), size);
1795 1798 me = DI_NODE(di_mem_addr(st, off));
1796 1799 me->self = off;
1797 1800 off += size;
1798 1801
1799 1802 dcmn_err((CE_CONT, "copy node %s, instance #%d, at offset 0x%x\n",
1800 1803 node->devi_node_name, node->devi_instance, off));
1801 1804
1802 1805 /*
1803 1806 * Node parameters:
1804 1807 * self -- offset of current node within snapshot
1805 1808 * nodeid -- pointer to PROM node (tri-valued)
1806 1809 * state -- hot plugging device state
1807 1810 * node_state -- devinfo node state
1808 1811 */
1809 1812 me->instance = node->devi_instance;
1810 1813 me->nodeid = node->devi_nodeid;
1811 1814 me->node_class = node->devi_node_class;
1812 1815 me->attributes = node->devi_node_attributes;
1813 1816 me->state = node->devi_state;
1814 1817 me->flags = node->devi_flags;
1815 1818 me->node_state = node->devi_node_state;
1816 1819 me->next_vhci = 0; /* Filled up by build_vhci_list. */
1817 1820 me->top_phci = 0; /* Filled up by build_phci_list. */
1818 1821 me->next_phci = 0; /* Filled up by build_phci_list. */
1819 1822 me->multipath_component = MULTIPATH_COMPONENT_NONE; /* set default. */
1820 1823 me->user_private_data = NULL;
1821 1824
1822 1825 /*
1823 1826 * Get parent's offset in snapshot from the stack
1824 1827 * and store it in the current node
1825 1828 */
1826 1829 if (dsp->depth > 1) {
1827 1830 me->parent = *(PARENT_OFFSET(dsp));
1828 1831 }
1829 1832
1830 1833 /*
1831 1834 * Save the offset of this di_node in a hash table.
1832 1835 * This is used later to resolve references to this
1833 1836 * dip from other parts of the tree (per-driver list,
1834 1837 * multipathing linkages, layered usage linkages).
1835 1838 * The key used for the hash table is derived from
1836 1839 * information in the dip.
1837 1840 */
1838 1841 di_register_dip(st, (dev_info_t *)node, me->self);
1839 1842
1840 1843 #ifdef DEVID_COMPATIBILITY
1841 1844 /* check for devid as property marker */
1842 1845 if (node->devi_devid_str) {
1843 1846 ddi_devid_t devid;
1844 1847
1845 1848 /*
1846 1849 * The devid is now represented as a property. For
1847 1850 * compatibility with di_devid() interface in libdevinfo we
1848 1851 * must return it as a binary structure in the snapshot. When
1849 1852 * (if) di_devid() is removed from libdevinfo then the code
1850 1853 * related to DEVID_COMPATIBILITY can be removed.
1851 1854 */
1852 1855 if (ddi_devid_str_decode(node->devi_devid_str, &devid, NULL) ==
1853 1856 DDI_SUCCESS) {
1854 1857 size = ddi_devid_sizeof(devid);
1855 1858 off = di_checkmem(st, off, size);
1856 1859 me->devid = off;
1857 1860 bcopy(devid, di_mem_addr(st, off), size);
1858 1861 off += size;
1859 1862 ddi_devid_free(devid);
1860 1863 }
1861 1864 }
1862 1865 #endif /* DEVID_COMPATIBILITY */
1863 1866
1864 1867 if (node->devi_node_name) {
1865 1868 size = strlen(node->devi_node_name) + 1;
1866 1869 me->node_name = off = di_checkmem(st, off, size);
1867 1870 (void) strcpy(di_mem_addr(st, off), node->devi_node_name);
1868 1871 off += size;
1869 1872 }
1870 1873
1871 1874 if (node->devi_compat_names && (node->devi_compat_length > 1)) {
1872 1875 size = node->devi_compat_length;
1873 1876 me->compat_names = off = di_checkmem(st, off, size);
1874 1877 me->compat_length = (int)size;
1875 1878 bcopy(node->devi_compat_names, di_mem_addr(st, off), size);
1876 1879 off += size;
1877 1880 }
1878 1881
1879 1882 if (node->devi_addr) {
1880 1883 size = strlen(node->devi_addr) + 1;
1881 1884 me->address = off = di_checkmem(st, off, size);
1882 1885 (void) strcpy(di_mem_addr(st, off), node->devi_addr);
1883 1886 off += size;
1884 1887 }
1885 1888
1886 1889 if (node->devi_binding_name) {
1887 1890 size = strlen(node->devi_binding_name) + 1;
1888 1891 me->bind_name = off = di_checkmem(st, off, size);
1889 1892 (void) strcpy(di_mem_addr(st, off), node->devi_binding_name);
1890 1893 off += size;
1891 1894 }
1892 1895
1893 1896 me->drv_major = node->devi_major;
1894 1897
1895 1898 /*
1896 1899 * If the dip is BOUND, set the next pointer of the
1897 1900 * per-instance list to -1, indicating that it is yet to be resolved.
1898 1901 * This will be resolved later in snap_driver_list().
1899 1902 */
1900 1903 if (me->drv_major != -1) {
1901 1904 me->next = -1;
1902 1905 } else {
1903 1906 me->next = 0;
1904 1907 }
1905 1908
1906 1909 /*
1907 1910 * An optimization to skip mutex_enter when not needed.
1908 1911 */
1909 1912 if (!((DINFOMINOR | DINFOPROP | DINFOPATH | DINFOHP) & st->command)) {
1910 1913 goto priv_data;
1911 1914 }
1912 1915
1913 1916 /*
1914 1917 * LOCKING: We already have an active ndi_devi_enter to gather the
1915 1918 * minor data, and we will take devi_lock to gather properties as
1916 1919 * needed off di_getprop.
1917 1920 */
1918 1921 if (!(DINFOMINOR & st->command)) {
1919 1922 goto path;
1920 1923 }
1921 1924
1922 1925 ASSERT(DEVI_BUSY_OWNED(node));
1923 1926 if (node->devi_minor) { /* minor data */
1924 1927 me->minor_data = off;
1925 1928 off = di_getmdata(node->devi_minor, &me->minor_data,
1926 1929 me->self, st);
1927 1930 }
1928 1931
1929 1932 path:
1930 1933 if (!(DINFOPATH & st->command)) {
1931 1934 goto property;
1932 1935 }
1933 1936
1934 1937 if (MDI_VHCI(node)) {
1935 1938 me->multipath_component = MULTIPATH_COMPONENT_VHCI;
1936 1939 }
1937 1940
1938 1941 if (MDI_CLIENT(node)) {
1939 1942 me->multipath_component = MULTIPATH_COMPONENT_CLIENT;
1940 1943 me->multipath_client = off;
1941 1944 off = di_getpath_data((dev_info_t *)node, &me->multipath_client,
1942 1945 me->self, st, 1);
1943 1946 dcmn_err((CE_WARN, "me->multipath_client = %x for node %p "
1944 1947 "component type = %d. off=%d",
1945 1948 me->multipath_client,
1946 1949 (void *)node, node->devi_mdi_component, off));
1947 1950 }
1948 1951
1949 1952 if (MDI_PHCI(node)) {
1950 1953 me->multipath_component = MULTIPATH_COMPONENT_PHCI;
1951 1954 me->multipath_phci = off;
1952 1955 off = di_getpath_data((dev_info_t *)node, &me->multipath_phci,
1953 1956 me->self, st, 0);
1954 1957 dcmn_err((CE_WARN, "me->multipath_phci = %x for node %p "
1955 1958 "component type = %d. off=%d",
1956 1959 me->multipath_phci,
1957 1960 (void *)node, node->devi_mdi_component, off));
1958 1961 }
1959 1962
1960 1963 property:
1961 1964 if (!(DINFOPROP & st->command)) {
1962 1965 goto hotplug_data;
1963 1966 }
1964 1967
1965 1968 if (node->devi_drv_prop_ptr) { /* driver property list */
1966 1969 me->drv_prop = off;
1967 1970 off = di_getprop(DI_PROP_DRV_LIST, &node->devi_drv_prop_ptr,
1968 1971 &me->drv_prop, st, node);
1969 1972 }
1970 1973
1971 1974 if (node->devi_sys_prop_ptr) { /* system property list */
1972 1975 me->sys_prop = off;
1973 1976 off = di_getprop(DI_PROP_SYS_LIST, &node->devi_sys_prop_ptr,
1974 1977 &me->sys_prop, st, node);
1975 1978 }
1976 1979
1977 1980 if (node->devi_hw_prop_ptr) { /* hardware property list */
1978 1981 me->hw_prop = off;
1979 1982 off = di_getprop(DI_PROP_HW_LIST, &node->devi_hw_prop_ptr,
1980 1983 &me->hw_prop, st, node);
1981 1984 }
1982 1985
1983 1986 if (node->devi_global_prop_list == NULL) {
1984 1987 me->glob_prop = (di_off_t)-1; /* not global property */
1985 1988 } else {
1986 1989 /*
1987 1990 * Make copy of global property list if this devinfo refers
1988 1991 * global properties different from what's on the devnames
1989 1992 * array. It can happen if there has been a forced
1990 1993 * driver.conf update. See mod_drv(1M).
1991 1994 */
1992 1995 ASSERT(me->drv_major != -1);
1993 1996 if (node->devi_global_prop_list !=
1994 1997 devnamesp[me->drv_major].dn_global_prop_ptr) {
1995 1998 me->glob_prop = off;
1996 1999 off = di_getprop(DI_PROP_GLB_LIST,
1997 2000 &node->devi_global_prop_list->prop_list,
1998 2001 &me->glob_prop, st, node);
1999 2002 }
2000 2003 }
2001 2004
2002 2005 hotplug_data:
2003 2006 if (!(DINFOHP & st->command)) {
2004 2007 goto priv_data;
2005 2008 }
2006 2009
2007 2010 if (node->devi_hp_hdlp) { /* hotplug data */
2008 2011 me->hp_data = off;
2009 2012 off = di_gethpdata(node->devi_hp_hdlp, &me->hp_data, st);
2010 2013 }
2011 2014
2012 2015 priv_data:
2013 2016 if (!(DINFOPRIVDATA & st->command)) {
2014 2017 goto pm_info;
2015 2018 }
2016 2019
2017 2020 if (ddi_get_parent_data((dev_info_t *)node) != NULL) {
2018 2021 me->parent_data = off;
2019 2022 off = di_getppdata(node, &me->parent_data, st);
2020 2023 }
2021 2024
2022 2025 if (ddi_get_driver_private((dev_info_t *)node) != NULL) {
2023 2026 me->driver_data = off;
2024 2027 off = di_getdpdata(node, &me->driver_data, st);
2025 2028 }
2026 2029
2027 2030 pm_info: /* NOT implemented */
2028 2031
2029 2032 subtree:
2030 2033 /* keep the stack aligned */
2031 2034 off = DI_ALIGN(off);
2032 2035
2033 2036 if (!(DINFOSUBTREE & st->command)) {
2034 2037 POP_STACK(dsp);
2035 2038 return (off);
2036 2039 }
2037 2040
2038 2041 child:
2039 2042 /*
2040 2043 * If there is a visible child--push child onto stack.
2041 2044 * Hold the parent (me) busy while doing so.
2042 2045 */
2043 2046 if ((n = node->devi_child) != NULL) {
2044 2047 /* skip hidden nodes */
2045 2048 while (n && ndi_dev_is_hidden_node((dev_info_t *)n))
2046 2049 n = n->devi_sibling;
2047 2050 if (n) {
2048 2051 me->child = off;
2049 2052 PUSH_STACK(dsp, n, &me->child);
2050 2053 return (me->child);
2051 2054 }
2052 2055 }
2053 2056
2054 2057 sibling:
2055 2058 /*
2056 2059 * Done with any child nodes, unroll the stack till a visible
2057 2060 * sibling of a parent node is found or root node is reached.
2058 2061 */
2059 2062 POP_STACK(dsp);
2060 2063 while (!EMPTY_STACK(dsp)) {
2061 2064 if ((n = node->devi_sibling) != NULL) {
2062 2065 /* skip hidden nodes */
2063 2066 while (n && ndi_dev_is_hidden_node((dev_info_t *)n))
2064 2067 n = n->devi_sibling;
2065 2068 if (n) {
2066 2069 me->sibling = DI_ALIGN(off);
2067 2070 PUSH_STACK(dsp, n, &me->sibling);
2068 2071 return (me->sibling);
2069 2072 }
2070 2073 }
2071 2074 node = TOP_NODE(dsp);
2072 2075 me = DI_NODE(di_mem_addr(st, *(TOP_OFFSET(dsp))));
2073 2076 POP_STACK(dsp);
2074 2077 }
2075 2078
2076 2079 /*
2077 2080 * DONE with all nodes
2078 2081 */
2079 2082 return (off);
2080 2083 }
2081 2084
2082 2085 static i_lnode_t *
2083 2086 i_lnode_alloc(int modid)
2084 2087 {
2085 2088 i_lnode_t *i_lnode;
2086 2089
2087 2090 i_lnode = kmem_zalloc(sizeof (i_lnode_t), KM_SLEEP);
2088 2091
2089 2092 ASSERT(modid != -1);
2090 2093 i_lnode->modid = modid;
2091 2094
2092 2095 return (i_lnode);
2093 2096 }
2094 2097
2095 2098 static void
2096 2099 i_lnode_free(i_lnode_t *i_lnode)
2097 2100 {
2098 2101 kmem_free(i_lnode, sizeof (i_lnode_t));
2099 2102 }
2100 2103
2101 2104 static void
2102 2105 i_lnode_check_free(i_lnode_t *i_lnode)
2103 2106 {
2104 2107 /* This lnode and its dip must have been snapshotted */
2105 2108 ASSERT(i_lnode->self > 0);
2106 2109 ASSERT(i_lnode->di_node->self > 0);
2107 2110
2108 2111 /* at least 1 link (in or out) must exist for this lnode */
2109 2112 ASSERT(i_lnode->link_in || i_lnode->link_out);
2110 2113
2111 2114 i_lnode_free(i_lnode);
2112 2115 }
2113 2116
2114 2117 static i_link_t *
2115 2118 i_link_alloc(int spec_type)
2116 2119 {
2117 2120 i_link_t *i_link;
2118 2121
2119 2122 i_link = kmem_zalloc(sizeof (i_link_t), KM_SLEEP);
2120 2123 i_link->spec_type = spec_type;
2121 2124
2122 2125 return (i_link);
2123 2126 }
2124 2127
2125 2128 static void
2126 2129 i_link_check_free(i_link_t *i_link)
2127 2130 {
2128 2131 /* This link must have been snapshotted */
2129 2132 ASSERT(i_link->self > 0);
2130 2133
2131 2134 /* Both endpoint lnodes must exist for this link */
2132 2135 ASSERT(i_link->src_lnode);
2133 2136 ASSERT(i_link->tgt_lnode);
2134 2137
2135 2138 kmem_free(i_link, sizeof (i_link_t));
2136 2139 }
2137 2140
2138 2141 /*ARGSUSED*/
2139 2142 static uint_t
2140 2143 i_lnode_hashfunc(void *arg, mod_hash_key_t key)
2141 2144 {
2142 2145 i_lnode_t *i_lnode = (i_lnode_t *)key;
2143 2146 struct di_node *ptr;
2144 2147 dev_t dev;
2145 2148
2146 2149 dev = i_lnode->devt;
2147 2150 if (dev != DDI_DEV_T_NONE)
2148 2151 return (i_lnode->modid + getminor(dev) + getmajor(dev));
2149 2152
2150 2153 ptr = i_lnode->di_node;
2151 2154 ASSERT(ptr->self > 0);
2152 2155 if (ptr) {
2153 2156 uintptr_t k = (uintptr_t)ptr;
2154 2157 k >>= (int)highbit(sizeof (struct di_node));
2155 2158 return ((uint_t)k);
2156 2159 }
2157 2160
2158 2161 return (i_lnode->modid);
2159 2162 }
2160 2163
2161 2164 static int
2162 2165 i_lnode_cmp(void *arg1, void *arg2)
2163 2166 {
2164 2167 i_lnode_t *i_lnode1 = (i_lnode_t *)arg1;
2165 2168 i_lnode_t *i_lnode2 = (i_lnode_t *)arg2;
2166 2169
2167 2170 if (i_lnode1->modid != i_lnode2->modid) {
2168 2171 return ((i_lnode1->modid < i_lnode2->modid) ? -1 : 1);
2169 2172 }
2170 2173
2171 2174 if (i_lnode1->di_node != i_lnode2->di_node)
2172 2175 return ((i_lnode1->di_node < i_lnode2->di_node) ? -1 : 1);
2173 2176
2174 2177 if (i_lnode1->devt != i_lnode2->devt)
2175 2178 return ((i_lnode1->devt < i_lnode2->devt) ? -1 : 1);
2176 2179
2177 2180 return (0);
2178 2181 }
2179 2182
2180 2183 /*
2181 2184 * An lnode represents a {dip, dev_t} tuple. A link represents a
2182 2185 * {src_lnode, tgt_lnode, spec_type} tuple.
2183 2186 * The following callback assumes that LDI framework ref-counts the
2184 2187 * src_dip and tgt_dip while invoking this callback.
2185 2188 */
2186 2189 static int
2187 2190 di_ldi_callback(const ldi_usage_t *ldi_usage, void *arg)
2188 2191 {
2189 2192 struct di_state *st = (struct di_state *)arg;
2190 2193 i_lnode_t *src_lnode, *tgt_lnode, *i_lnode;
2191 2194 i_link_t **i_link_next, *i_link;
2192 2195 di_off_t soff, toff;
2193 2196 mod_hash_val_t nodep = NULL;
2194 2197 int res;
2195 2198
2196 2199 /*
2197 2200 * if the source or target of this device usage information doesn't
2198 2201 * correspond to a device node then we don't report it via
2199 2202 * libdevinfo so return.
2200 2203 */
2201 2204 if ((ldi_usage->src_dip == NULL) || (ldi_usage->tgt_dip == NULL))
2202 2205 return (LDI_USAGE_CONTINUE);
2203 2206
2204 2207 ASSERT(e_ddi_devi_holdcnt(ldi_usage->src_dip));
2205 2208 ASSERT(e_ddi_devi_holdcnt(ldi_usage->tgt_dip));
2206 2209
2207 2210 /*
2208 2211 * Skip the ldi_usage if either src or tgt dip is not in the
2209 2212 * snapshot. This saves us from pruning bad lnodes/links later.
2210 2213 */
2211 2214 if (di_dip_find(st, ldi_usage->src_dip, &soff) != 0)
2212 2215 return (LDI_USAGE_CONTINUE);
2213 2216 if (di_dip_find(st, ldi_usage->tgt_dip, &toff) != 0)
2214 2217 return (LDI_USAGE_CONTINUE);
2215 2218
2216 2219 ASSERT(soff > 0);
2217 2220 ASSERT(toff > 0);
2218 2221
2219 2222 /*
2220 2223 * allocate an i_lnode and add it to the lnode hash
2221 2224 * if it is not already present. For this particular
2222 2225 * link the lnode is a source, but it may
2223 2226 * participate as tgt or src in any number of layered
2224 2227 * operations - so it may already be in the hash.
2225 2228 */
2226 2229 i_lnode = i_lnode_alloc(ldi_usage->src_modid);
2227 2230 i_lnode->di_node = DI_NODE(di_mem_addr(st, soff));
2228 2231 i_lnode->devt = ldi_usage->src_devt;
2229 2232
2230 2233 res = mod_hash_find(st->lnode_hash, i_lnode, &nodep);
2231 2234 if (res == MH_ERR_NOTFOUND) {
2232 2235 /*
2233 2236 * new i_lnode
2234 2237 * add it to the hash and increment the lnode count
2235 2238 */
2236 2239 res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode);
2237 2240 ASSERT(res == 0);
2238 2241 st->lnode_count++;
2239 2242 src_lnode = i_lnode;
2240 2243 } else {
2241 2244 /* this i_lnode already exists in the lnode_hash */
2242 2245 i_lnode_free(i_lnode);
2243 2246 src_lnode = (i_lnode_t *)nodep;
2244 2247 }
2245 2248
2246 2249 /*
2247 2250 * allocate a tgt i_lnode and add it to the lnode hash
2248 2251 */
2249 2252 i_lnode = i_lnode_alloc(ldi_usage->tgt_modid);
2250 2253 i_lnode->di_node = DI_NODE(di_mem_addr(st, toff));
2251 2254 i_lnode->devt = ldi_usage->tgt_devt;
2252 2255
2253 2256 res = mod_hash_find(st->lnode_hash, i_lnode, &nodep);
2254 2257 if (res == MH_ERR_NOTFOUND) {
2255 2258 /*
2256 2259 * new i_lnode
2257 2260 * add it to the hash and increment the lnode count
2258 2261 */
2259 2262 res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode);
2260 2263 ASSERT(res == 0);
2261 2264 st->lnode_count++;
2262 2265 tgt_lnode = i_lnode;
2263 2266 } else {
2264 2267 /* this i_lnode already exists in the lnode_hash */
2265 2268 i_lnode_free(i_lnode);
2266 2269 tgt_lnode = (i_lnode_t *)nodep;
2267 2270 }
2268 2271
2269 2272 /*
2270 2273 * allocate a i_link
2271 2274 */
2272 2275 i_link = i_link_alloc(ldi_usage->tgt_spec_type);
2273 2276 i_link->src_lnode = src_lnode;
2274 2277 i_link->tgt_lnode = tgt_lnode;
2275 2278
2276 2279 /*
2277 2280 * add this link onto the src i_lnodes outbound i_link list
2278 2281 */
2279 2282 i_link_next = &(src_lnode->link_out);
2280 2283 while (*i_link_next != NULL) {
2281 2284 if ((i_lnode_cmp(tgt_lnode, (*i_link_next)->tgt_lnode) == 0) &&
2282 2285 (i_link->spec_type == (*i_link_next)->spec_type)) {
2283 2286 /* this link already exists */
2284 2287 kmem_free(i_link, sizeof (i_link_t));
2285 2288 return (LDI_USAGE_CONTINUE);
2286 2289 }
2287 2290 i_link_next = &((*i_link_next)->src_link_next);
2288 2291 }
2289 2292 *i_link_next = i_link;
2290 2293
2291 2294 /*
2292 2295 * add this link onto the tgt i_lnodes inbound i_link list
2293 2296 */
2294 2297 i_link_next = &(tgt_lnode->link_in);
2295 2298 while (*i_link_next != NULL) {
2296 2299 ASSERT(i_lnode_cmp(src_lnode, (*i_link_next)->src_lnode) != 0);
2297 2300 i_link_next = &((*i_link_next)->tgt_link_next);
2298 2301 }
2299 2302 *i_link_next = i_link;
2300 2303
2301 2304 /*
2302 2305 * add this i_link to the link hash
2303 2306 */
2304 2307 res = mod_hash_insert(st->link_hash, i_link, i_link);
2305 2308 ASSERT(res == 0);
2306 2309 st->link_count++;
2307 2310
2308 2311 return (LDI_USAGE_CONTINUE);
2309 2312 }
2310 2313
2311 2314 struct i_layer_data {
2312 2315 struct di_state *st;
2313 2316 int lnode_count;
2314 2317 int link_count;
2315 2318 di_off_t lnode_off;
2316 2319 di_off_t link_off;
2317 2320 };
2318 2321
2319 2322 /*ARGSUSED*/
2320 2323 static uint_t
2321 2324 i_link_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
2322 2325 {
2323 2326 i_link_t *i_link = (i_link_t *)key;
2324 2327 struct i_layer_data *data = arg;
2325 2328 struct di_link *me;
2326 2329 struct di_lnode *melnode;
2327 2330 struct di_node *medinode;
2328 2331
2329 2332 ASSERT(i_link->self == 0);
2330 2333
2331 2334 i_link->self = data->link_off +
2332 2335 (data->link_count * sizeof (struct di_link));
2333 2336 data->link_count++;
2334 2337
2335 2338 ASSERT(data->link_off > 0 && data->link_count > 0);
2336 2339 ASSERT(data->lnode_count == data->st->lnode_count); /* lnodes done */
2337 2340 ASSERT(data->link_count <= data->st->link_count);
2338 2341
2339 2342 /* fill in fields for the di_link snapshot */
2340 2343 me = DI_LINK(di_mem_addr(data->st, i_link->self));
2341 2344 me->self = i_link->self;
2342 2345 me->spec_type = i_link->spec_type;
2343 2346
2344 2347 /*
2345 2348 * The src_lnode and tgt_lnode i_lnode_t for this i_link_t
2346 2349 * are created during the LDI table walk. Since we are
2347 2350 * walking the link hash, the lnode hash has already been
2348 2351 * walked and the lnodes have been snapshotted. Save lnode
2349 2352 * offsets.
2350 2353 */
2351 2354 me->src_lnode = i_link->src_lnode->self;
2352 2355 me->tgt_lnode = i_link->tgt_lnode->self;
2353 2356
2354 2357 /*
2355 2358 * Save this link's offset in the src_lnode snapshot's link_out
2356 2359 * field
2357 2360 */
2358 2361 melnode = DI_LNODE(di_mem_addr(data->st, me->src_lnode));
2359 2362 me->src_link_next = melnode->link_out;
2360 2363 melnode->link_out = me->self;
2361 2364
2362 2365 /*
2363 2366 * Put this link on the tgt_lnode's link_in field
2364 2367 */
2365 2368 melnode = DI_LNODE(di_mem_addr(data->st, me->tgt_lnode));
2366 2369 me->tgt_link_next = melnode->link_in;
2367 2370 melnode->link_in = me->self;
2368 2371
2369 2372 /*
2370 2373 * An i_lnode_t is only created if the corresponding dip exists
2371 2374 * in the snapshot. A pointer to the di_node is saved in the
2372 2375 * i_lnode_t when it is allocated. For this link, get the di_node
2373 2376 * for the source lnode. Then put the link on the di_node's list
2374 2377 * of src links
2375 2378 */
2376 2379 medinode = i_link->src_lnode->di_node;
2377 2380 me->src_node_next = medinode->src_links;
2378 2381 medinode->src_links = me->self;
2379 2382
2380 2383 /*
2381 2384 * Put this link on the tgt_links list of the target
2382 2385 * dip.
2383 2386 */
2384 2387 medinode = i_link->tgt_lnode->di_node;
2385 2388 me->tgt_node_next = medinode->tgt_links;
2386 2389 medinode->tgt_links = me->self;
2387 2390
2388 2391 return (MH_WALK_CONTINUE);
2389 2392 }
2390 2393
2391 2394 /*ARGSUSED*/
2392 2395 static uint_t
2393 2396 i_lnode_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
2394 2397 {
2395 2398 i_lnode_t *i_lnode = (i_lnode_t *)key;
2396 2399 struct i_layer_data *data = arg;
2397 2400 struct di_lnode *me;
2398 2401 struct di_node *medinode;
2399 2402
2400 2403 ASSERT(i_lnode->self == 0);
2401 2404
2402 2405 i_lnode->self = data->lnode_off +
2403 2406 (data->lnode_count * sizeof (struct di_lnode));
2404 2407 data->lnode_count++;
2405 2408
2406 2409 ASSERT(data->lnode_off > 0 && data->lnode_count > 0);
2407 2410 ASSERT(data->link_count == 0); /* links not done yet */
2408 2411 ASSERT(data->lnode_count <= data->st->lnode_count);
2409 2412
2410 2413 /* fill in fields for the di_lnode snapshot */
2411 2414 me = DI_LNODE(di_mem_addr(data->st, i_lnode->self));
2412 2415 me->self = i_lnode->self;
2413 2416
2414 2417 if (i_lnode->devt == DDI_DEV_T_NONE) {
2415 2418 me->dev_major = DDI_MAJOR_T_NONE;
2416 2419 me->dev_minor = DDI_MAJOR_T_NONE;
2417 2420 } else {
2418 2421 me->dev_major = getmajor(i_lnode->devt);
2419 2422 me->dev_minor = getminor(i_lnode->devt);
2420 2423 }
2421 2424
2422 2425 /*
2423 2426 * The dip corresponding to this lnode must exist in
2424 2427 * the snapshot or we wouldn't have created the i_lnode_t
2425 2428 * during LDI walk. Save the offset of the dip.
2426 2429 */
2427 2430 ASSERT(i_lnode->di_node && i_lnode->di_node->self > 0);
2428 2431 me->node = i_lnode->di_node->self;
2429 2432
2430 2433 /*
2431 2434 * There must be at least one link in or out of this lnode
2432 2435 * or we wouldn't have created it. These fields will be set
2433 2436 * during the link hash walk.
2434 2437 */
2435 2438 ASSERT((i_lnode->link_in != NULL) || (i_lnode->link_out != NULL));
2436 2439
2437 2440 /*
2438 2441 * set the offset of the devinfo node associated with this
2439 2442 * lnode. Also update the node_next next pointer. this pointer
2440 2443 * is set if there are multiple lnodes associated with the same
2441 2444 * devinfo node. (could occure when multiple minor nodes
2442 2445 * are open for one device, etc.)
2443 2446 */
2444 2447 medinode = i_lnode->di_node;
2445 2448 me->node_next = medinode->lnodes;
2446 2449 medinode->lnodes = me->self;
2447 2450
2448 2451 return (MH_WALK_CONTINUE);
2449 2452 }
2450 2453
2451 2454 static di_off_t
2452 2455 di_getlink_data(di_off_t off, struct di_state *st)
2453 2456 {
2454 2457 struct i_layer_data data = {0};
2455 2458 size_t size;
2456 2459
2457 2460 dcmn_err2((CE_CONT, "di_copylyr: off = %x\n", off));
2458 2461
2459 2462 st->lnode_hash = mod_hash_create_extended("di_lnode_hash", 32,
2460 2463 mod_hash_null_keydtor, (void (*)(mod_hash_val_t))i_lnode_check_free,
2461 2464 i_lnode_hashfunc, NULL, i_lnode_cmp, KM_SLEEP);
2462 2465
2463 2466 st->link_hash = mod_hash_create_ptrhash("di_link_hash", 32,
2464 2467 (void (*)(mod_hash_val_t))i_link_check_free, sizeof (i_link_t));
2465 2468
2466 2469 /* get driver layering information */
2467 2470 (void) ldi_usage_walker(st, di_ldi_callback);
2468 2471
2469 2472 /* check if there is any link data to include in the snapshot */
2470 2473 if (st->lnode_count == 0) {
2471 2474 ASSERT(st->link_count == 0);
2472 2475 goto out;
2473 2476 }
2474 2477
2475 2478 ASSERT(st->link_count != 0);
2476 2479
2477 2480 /* get a pointer to snapshot memory for all the di_lnodes */
2478 2481 size = sizeof (struct di_lnode) * st->lnode_count;
2479 2482 data.lnode_off = off = di_checkmem(st, off, size);
2480 2483 off += size;
2481 2484
2482 2485 /* get a pointer to snapshot memory for all the di_links */
2483 2486 size = sizeof (struct di_link) * st->link_count;
2484 2487 data.link_off = off = di_checkmem(st, off, size);
2485 2488 off += size;
2486 2489
2487 2490 data.lnode_count = data.link_count = 0;
2488 2491 data.st = st;
2489 2492
2490 2493 /*
2491 2494 * We have lnodes and links that will go into the
2492 2495 * snapshot, so let's walk the respective hashes
2493 2496 * and snapshot them. The various linkages are
2494 2497 * also set up during the walk.
2495 2498 */
2496 2499 mod_hash_walk(st->lnode_hash, i_lnode_walker, (void *)&data);
2497 2500 ASSERT(data.lnode_count == st->lnode_count);
2498 2501
2499 2502 mod_hash_walk(st->link_hash, i_link_walker, (void *)&data);
2500 2503 ASSERT(data.link_count == st->link_count);
2501 2504
2502 2505 out:
2503 2506 /* free up the i_lnodes and i_links used to create the snapshot */
2504 2507 mod_hash_destroy_hash(st->lnode_hash);
2505 2508 mod_hash_destroy_hash(st->link_hash);
2506 2509 st->lnode_count = 0;
2507 2510 st->link_count = 0;
2508 2511
2509 2512 return (off);
2510 2513 }
2511 2514
2512 2515
2513 2516 /*
2514 2517 * Copy all minor data nodes attached to a devinfo node into the snapshot.
2515 2518 * It is called from di_copynode with active ndi_devi_enter to protect
2516 2519 * the list of minor nodes.
2517 2520 */
2518 2521 static di_off_t
2519 2522 di_getmdata(struct ddi_minor_data *mnode, di_off_t *off_p, di_off_t node,
2520 2523 struct di_state *st)
2521 2524 {
2522 2525 di_off_t off;
2523 2526 struct di_minor *me;
2524 2527 size_t size;
2525 2528
2526 2529 dcmn_err2((CE_CONT, "di_getmdata:\n"));
2527 2530
2528 2531 /*
2529 2532 * check memory first
2530 2533 */
2531 2534 off = di_checkmem(st, *off_p, sizeof (struct di_minor));
2532 2535 *off_p = off;
2533 2536
2534 2537 do {
2535 2538 me = DI_MINOR(di_mem_addr(st, off));
2536 2539 me->self = off;
2537 2540 me->type = mnode->type;
2538 2541 me->node = node;
2539 2542 me->user_private_data = NULL;
2540 2543
2541 2544 off += sizeof (struct di_minor);
2542 2545
2543 2546 /*
2544 2547 * Split dev_t to major/minor, so it works for
2545 2548 * both ILP32 and LP64 model
2546 2549 */
2547 2550 me->dev_major = getmajor(mnode->ddm_dev);
2548 2551 me->dev_minor = getminor(mnode->ddm_dev);
2549 2552 me->spec_type = mnode->ddm_spec_type;
2550 2553
2551 2554 if (mnode->ddm_name) {
2552 2555 size = strlen(mnode->ddm_name) + 1;
2553 2556 me->name = off = di_checkmem(st, off, size);
2554 2557 (void) strcpy(di_mem_addr(st, off), mnode->ddm_name);
2555 2558 off += size;
2556 2559 }
2557 2560
2558 2561 if (mnode->ddm_node_type) {
2559 2562 size = strlen(mnode->ddm_node_type) + 1;
2560 2563 me->node_type = off = di_checkmem(st, off, size);
2561 2564 (void) strcpy(di_mem_addr(st, off),
2562 2565 mnode->ddm_node_type);
2563 2566 off += size;
2564 2567 }
2565 2568
2566 2569 off = di_checkmem(st, off, sizeof (struct di_minor));
2567 2570 me->next = off;
2568 2571 mnode = mnode->next;
2569 2572 } while (mnode);
2570 2573
2571 2574 me->next = 0;
2572 2575
2573 2576 return (off);
2574 2577 }
2575 2578
2576 2579 /*
2577 2580 * di_register_dip(), di_find_dip(): The dip must be protected
2578 2581 * from deallocation when using these routines - this can either
2579 2582 * be a reference count, a busy hold or a per-driver lock.
2580 2583 */
2581 2584
2582 2585 static void
2583 2586 di_register_dip(struct di_state *st, dev_info_t *dip, di_off_t off)
2584 2587 {
2585 2588 struct dev_info *node = DEVI(dip);
2586 2589 struct di_key *key = kmem_zalloc(sizeof (*key), KM_SLEEP);
2587 2590 struct di_dkey *dk;
2588 2591
2589 2592 ASSERT(dip);
2590 2593 ASSERT(off > 0);
2591 2594
2592 2595 key->k_type = DI_DKEY;
2593 2596 dk = &(key->k_u.dkey);
2594 2597
2595 2598 dk->dk_dip = dip;
2596 2599 dk->dk_major = node->devi_major;
2597 2600 dk->dk_inst = node->devi_instance;
2598 2601 dk->dk_nodeid = node->devi_nodeid;
2599 2602
2600 2603 if (mod_hash_insert(st->reg_dip_hash, (mod_hash_key_t)key,
2601 2604 (mod_hash_val_t)(uintptr_t)off) != 0) {
2602 2605 panic(
2603 2606 "duplicate devinfo (%p) registered during device "
2604 2607 "tree walk", (void *)dip);
2605 2608 }
2606 2609 }
2607 2610
2608 2611
2609 2612 static int
2610 2613 di_dip_find(struct di_state *st, dev_info_t *dip, di_off_t *off_p)
2611 2614 {
2612 2615 /*
2613 2616 * uintptr_t must be used because it matches the size of void *;
2614 2617 * mod_hash expects clients to place results into pointer-size
2615 2618 * containers; since di_off_t is always a 32-bit offset, alignment
2616 2619 * would otherwise be broken on 64-bit kernels.
2617 2620 */
2618 2621 uintptr_t offset;
2619 2622 struct di_key key = {0};
2620 2623 struct di_dkey *dk;
2621 2624
2622 2625 ASSERT(st->reg_dip_hash);
2623 2626 ASSERT(dip);
2624 2627 ASSERT(off_p);
2625 2628
2626 2629
2627 2630 key.k_type = DI_DKEY;
2628 2631 dk = &(key.k_u.dkey);
2629 2632
2630 2633 dk->dk_dip = dip;
2631 2634 dk->dk_major = DEVI(dip)->devi_major;
2632 2635 dk->dk_inst = DEVI(dip)->devi_instance;
2633 2636 dk->dk_nodeid = DEVI(dip)->devi_nodeid;
2634 2637
2635 2638 if (mod_hash_find(st->reg_dip_hash, (mod_hash_key_t)&key,
2636 2639 (mod_hash_val_t *)&offset) == 0) {
2637 2640 *off_p = (di_off_t)offset;
2638 2641 return (0);
2639 2642 } else {
2640 2643 return (-1);
2641 2644 }
2642 2645 }
2643 2646
2644 2647 /*
2645 2648 * di_register_pip(), di_find_pip(): The pip must be protected from deallocation
2646 2649 * when using these routines. The caller must do this by protecting the
2647 2650 * client(or phci)<->pip linkage while traversing the list and then holding the
2648 2651 * pip when it is found in the list.
2649 2652 */
2650 2653
2651 2654 static void
2652 2655 di_register_pip(struct di_state *st, mdi_pathinfo_t *pip, di_off_t off)
2653 2656 {
2654 2657 struct di_key *key = kmem_zalloc(sizeof (*key), KM_SLEEP);
2655 2658 char *path_addr;
2656 2659 struct di_pkey *pk;
2657 2660
2658 2661 ASSERT(pip);
2659 2662 ASSERT(off > 0);
2660 2663
2661 2664 key->k_type = DI_PKEY;
2662 2665 pk = &(key->k_u.pkey);
2663 2666
2664 2667 pk->pk_pip = pip;
2665 2668 path_addr = mdi_pi_get_addr(pip);
2666 2669 if (path_addr)
2667 2670 pk->pk_path_addr = i_ddi_strdup(path_addr, KM_SLEEP);
2668 2671 pk->pk_client = mdi_pi_get_client(pip);
2669 2672 pk->pk_phci = mdi_pi_get_phci(pip);
2670 2673
2671 2674 if (mod_hash_insert(st->reg_pip_hash, (mod_hash_key_t)key,
2672 2675 (mod_hash_val_t)(uintptr_t)off) != 0) {
2673 2676 panic(
2674 2677 "duplicate pathinfo (%p) registered during device "
2675 2678 "tree walk", (void *)pip);
2676 2679 }
2677 2680 }
2678 2681
2679 2682 /*
2680 2683 * As with di_register_pip, the caller must hold or lock the pip
2681 2684 */
2682 2685 static int
2683 2686 di_pip_find(struct di_state *st, mdi_pathinfo_t *pip, di_off_t *off_p)
2684 2687 {
2685 2688 /*
2686 2689 * uintptr_t must be used because it matches the size of void *;
2687 2690 * mod_hash expects clients to place results into pointer-size
2688 2691 * containers; since di_off_t is always a 32-bit offset, alignment
2689 2692 * would otherwise be broken on 64-bit kernels.
2690 2693 */
2691 2694 uintptr_t offset;
2692 2695 struct di_key key = {0};
2693 2696 struct di_pkey *pk;
2694 2697
2695 2698 ASSERT(st->reg_pip_hash);
2696 2699 ASSERT(off_p);
2697 2700
2698 2701 if (pip == NULL) {
2699 2702 *off_p = 0;
2700 2703 return (0);
2701 2704 }
2702 2705
2703 2706 key.k_type = DI_PKEY;
2704 2707 pk = &(key.k_u.pkey);
2705 2708
2706 2709 pk->pk_pip = pip;
2707 2710 pk->pk_path_addr = mdi_pi_get_addr(pip);
2708 2711 pk->pk_client = mdi_pi_get_client(pip);
2709 2712 pk->pk_phci = mdi_pi_get_phci(pip);
2710 2713
2711 2714 if (mod_hash_find(st->reg_pip_hash, (mod_hash_key_t)&key,
2712 2715 (mod_hash_val_t *)&offset) == 0) {
2713 2716 *off_p = (di_off_t)offset;
2714 2717 return (0);
2715 2718 } else {
2716 2719 return (-1);
2717 2720 }
2718 2721 }
2719 2722
2720 2723 static di_path_state_t
2721 2724 path_state_convert(mdi_pathinfo_state_t st)
2722 2725 {
2723 2726 switch (st) {
2724 2727 case MDI_PATHINFO_STATE_ONLINE:
2725 2728 return (DI_PATH_STATE_ONLINE);
2726 2729 case MDI_PATHINFO_STATE_STANDBY:
2727 2730 return (DI_PATH_STATE_STANDBY);
2728 2731 case MDI_PATHINFO_STATE_OFFLINE:
2729 2732 return (DI_PATH_STATE_OFFLINE);
2730 2733 case MDI_PATHINFO_STATE_FAULT:
2731 2734 return (DI_PATH_STATE_FAULT);
2732 2735 default:
2733 2736 return (DI_PATH_STATE_UNKNOWN);
2734 2737 }
2735 2738 }
2736 2739
2737 2740 static uint_t
2738 2741 path_flags_convert(uint_t pi_path_flags)
2739 2742 {
2740 2743 uint_t di_path_flags = 0;
2741 2744
2742 2745 /* MDI_PATHINFO_FLAGS_HIDDEN nodes not in snapshot */
2743 2746
2744 2747 if (pi_path_flags & MDI_PATHINFO_FLAGS_DEVICE_REMOVED)
2745 2748 di_path_flags |= DI_PATH_FLAGS_DEVICE_REMOVED;
2746 2749
2747 2750 return (di_path_flags);
2748 2751 }
2749 2752
2750 2753
2751 2754 static di_off_t
2752 2755 di_path_getprop(mdi_pathinfo_t *pip, di_off_t *off_p,
2753 2756 struct di_state *st)
2754 2757 {
2755 2758 nvpair_t *prop = NULL;
2756 2759 struct di_path_prop *me;
2757 2760 int off;
2758 2761 size_t size;
2759 2762 char *str;
2760 2763 uchar_t *buf;
2761 2764 uint_t nelems;
2762 2765
2763 2766 off = *off_p;
2764 2767 if (mdi_pi_get_next_prop(pip, NULL) == NULL) {
2765 2768 *off_p = 0;
2766 2769 return (off);
2767 2770 }
2768 2771
2769 2772 off = di_checkmem(st, off, sizeof (struct di_path_prop));
2770 2773 *off_p = off;
2771 2774
2772 2775 while (prop = mdi_pi_get_next_prop(pip, prop)) {
2773 2776 me = DI_PATHPROP(di_mem_addr(st, off));
2774 2777 me->self = off;
2775 2778 off += sizeof (struct di_path_prop);
2776 2779
2777 2780 /*
2778 2781 * property name
2779 2782 */
2780 2783 size = strlen(nvpair_name(prop)) + 1;
2781 2784 me->prop_name = off = di_checkmem(st, off, size);
2782 2785 (void) strcpy(di_mem_addr(st, off), nvpair_name(prop));
2783 2786 off += size;
2784 2787
2785 2788 switch (nvpair_type(prop)) {
2786 2789 case DATA_TYPE_BYTE:
2787 2790 case DATA_TYPE_INT16:
2788 2791 case DATA_TYPE_UINT16:
2789 2792 case DATA_TYPE_INT32:
2790 2793 case DATA_TYPE_UINT32:
2791 2794 me->prop_type = DDI_PROP_TYPE_INT;
2792 2795 size = sizeof (int32_t);
2793 2796 off = di_checkmem(st, off, size);
2794 2797 (void) nvpair_value_int32(prop,
2795 2798 (int32_t *)di_mem_addr(st, off));
2796 2799 break;
2797 2800
2798 2801 case DATA_TYPE_INT64:
2799 2802 case DATA_TYPE_UINT64:
2800 2803 me->prop_type = DDI_PROP_TYPE_INT64;
2801 2804 size = sizeof (int64_t);
2802 2805 off = di_checkmem(st, off, size);
2803 2806 (void) nvpair_value_int64(prop,
2804 2807 (int64_t *)di_mem_addr(st, off));
2805 2808 break;
2806 2809
2807 2810 case DATA_TYPE_STRING:
2808 2811 me->prop_type = DDI_PROP_TYPE_STRING;
2809 2812 (void) nvpair_value_string(prop, &str);
2810 2813 size = strlen(str) + 1;
2811 2814 off = di_checkmem(st, off, size);
2812 2815 (void) strcpy(di_mem_addr(st, off), str);
2813 2816 break;
2814 2817
2815 2818 case DATA_TYPE_BYTE_ARRAY:
2816 2819 case DATA_TYPE_INT16_ARRAY:
2817 2820 case DATA_TYPE_UINT16_ARRAY:
2818 2821 case DATA_TYPE_INT32_ARRAY:
2819 2822 case DATA_TYPE_UINT32_ARRAY:
2820 2823 case DATA_TYPE_INT64_ARRAY:
2821 2824 case DATA_TYPE_UINT64_ARRAY:
2822 2825 me->prop_type = DDI_PROP_TYPE_BYTE;
2823 2826 (void) nvpair_value_byte_array(prop, &buf, &nelems);
2824 2827 size = nelems;
2825 2828 if (nelems != 0) {
2826 2829 off = di_checkmem(st, off, size);
2827 2830 bcopy(buf, di_mem_addr(st, off), size);
2828 2831 }
2829 2832 break;
2830 2833
2831 2834 default: /* Unknown or unhandled type; skip it */
2832 2835 size = 0;
2833 2836 break;
2834 2837 }
2835 2838
2836 2839 if (size > 0) {
2837 2840 me->prop_data = off;
2838 2841 }
2839 2842
2840 2843 me->prop_len = (int)size;
2841 2844 off += size;
2842 2845
2843 2846 off = di_checkmem(st, off, sizeof (struct di_path_prop));
2844 2847 me->prop_next = off;
2845 2848 }
2846 2849
2847 2850 me->prop_next = 0;
2848 2851 return (off);
2849 2852 }
2850 2853
2851 2854
2852 2855 static void
2853 2856 di_path_one_endpoint(struct di_path *me, di_off_t noff, di_off_t **off_pp,
2854 2857 int get_client)
2855 2858 {
2856 2859 if (get_client) {
2857 2860 ASSERT(me->path_client == 0);
2858 2861 me->path_client = noff;
2859 2862 ASSERT(me->path_c_link == 0);
2860 2863 *off_pp = &me->path_c_link;
2861 2864 me->path_snap_state &=
2862 2865 ~(DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOCLINK);
2863 2866 } else {
2864 2867 ASSERT(me->path_phci == 0);
2865 2868 me->path_phci = noff;
2866 2869 ASSERT(me->path_p_link == 0);
2867 2870 *off_pp = &me->path_p_link;
2868 2871 me->path_snap_state &=
2869 2872 ~(DI_PATH_SNAP_NOPHCI | DI_PATH_SNAP_NOPLINK);
2870 2873 }
2871 2874 }
2872 2875
2873 2876 /*
2874 2877 * off_p: pointer to the linkage field. This links pips along the client|phci
2875 2878 * linkage list.
2876 2879 * noff : Offset for the endpoint dip snapshot.
2877 2880 */
2878 2881 static di_off_t
2879 2882 di_getpath_data(dev_info_t *dip, di_off_t *off_p, di_off_t noff,
2880 2883 struct di_state *st, int get_client)
2881 2884 {
2882 2885 di_off_t off;
2883 2886 mdi_pathinfo_t *pip;
2884 2887 struct di_path *me;
2885 2888 mdi_pathinfo_t *(*next_pip)(dev_info_t *, mdi_pathinfo_t *);
2886 2889 size_t size;
2887 2890
2888 2891 dcmn_err2((CE_WARN, "di_getpath_data: client = %d", get_client));
2889 2892
2890 2893 /*
2891 2894 * The naming of the following mdi_xyz() is unfortunately
2892 2895 * non-intuitive. mdi_get_next_phci_path() follows the
2893 2896 * client_link i.e. the list of pip's belonging to the
2894 2897 * given client dip.
2895 2898 */
2896 2899 if (get_client)
2897 2900 next_pip = &mdi_get_next_phci_path;
2898 2901 else
2899 2902 next_pip = &mdi_get_next_client_path;
2900 2903
2901 2904 off = *off_p;
2902 2905
2903 2906 pip = NULL;
2904 2907 while (pip = (*next_pip)(dip, pip)) {
2905 2908 di_off_t stored_offset;
2906 2909
2907 2910 dcmn_err((CE_WARN, "marshalling pip = %p", (void *)pip));
2908 2911
2909 2912 mdi_pi_lock(pip);
2910 2913
2911 2914 /* We don't represent hidden paths in the snapshot */
2912 2915 if (mdi_pi_ishidden(pip)) {
2913 2916 dcmn_err((CE_WARN, "hidden, skip"));
2914 2917 mdi_pi_unlock(pip);
2915 2918 continue;
2916 2919 }
2917 2920
2918 2921 if (di_pip_find(st, pip, &stored_offset) != -1) {
2919 2922 /*
2920 2923 * We've already seen this pathinfo node so we need to
2921 2924 * take care not to snap it again; However, one endpoint
2922 2925 * and linkage will be set here. The other endpoint
2923 2926 * and linkage has already been set when the pip was
2924 2927 * first snapshotted i.e. when the other endpoint dip
2925 2928 * was snapshotted.
2926 2929 */
2927 2930 me = DI_PATH(di_mem_addr(st, stored_offset));
2928 2931 *off_p = stored_offset;
2929 2932
2930 2933 di_path_one_endpoint(me, noff, &off_p, get_client);
2931 2934
2932 2935 /*
2933 2936 * The other endpoint and linkage were set when this
2934 2937 * pip was snapshotted. So we are done with both
2935 2938 * endpoints and linkages.
2936 2939 */
2937 2940 ASSERT(!(me->path_snap_state &
2938 2941 (DI_PATH_SNAP_NOCLIENT|DI_PATH_SNAP_NOPHCI)));
2939 2942 ASSERT(!(me->path_snap_state &
2940 2943 (DI_PATH_SNAP_NOCLINK|DI_PATH_SNAP_NOPLINK)));
2941 2944
2942 2945 mdi_pi_unlock(pip);
2943 2946 continue;
2944 2947 }
2945 2948
2946 2949 /*
2947 2950 * Now that we need to snapshot this pip, check memory
2948 2951 */
2949 2952 size = sizeof (struct di_path);
2950 2953 *off_p = off = di_checkmem(st, off, size);
2951 2954 me = DI_PATH(di_mem_addr(st, off));
2952 2955 me->self = off;
2953 2956 off += size;
2954 2957
2955 2958 me->path_snap_state =
2956 2959 DI_PATH_SNAP_NOCLINK | DI_PATH_SNAP_NOPLINK;
2957 2960 me->path_snap_state |=
2958 2961 DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOPHCI;
2959 2962
2960 2963 /*
2961 2964 * Zero out fields as di_checkmem() doesn't guarantee
2962 2965 * zero-filled memory
2963 2966 */
2964 2967 me->path_client = me->path_phci = 0;
2965 2968 me->path_c_link = me->path_p_link = 0;
2966 2969
2967 2970 di_path_one_endpoint(me, noff, &off_p, get_client);
2968 2971
2969 2972 /*
2970 2973 * Note the existence of this pathinfo
2971 2974 */
2972 2975 di_register_pip(st, pip, me->self);
2973 2976
2974 2977 me->path_state = path_state_convert(mdi_pi_get_state(pip));
2975 2978 me->path_flags = path_flags_convert(mdi_pi_get_flags(pip));
2976 2979
2977 2980 me->path_instance = mdi_pi_get_path_instance(pip);
2978 2981
2979 2982 /*
2980 2983 * Get intermediate addressing info.
2981 2984 */
2982 2985 size = strlen(mdi_pi_get_addr(pip)) + 1;
2983 2986 me->path_addr = off = di_checkmem(st, off, size);
2984 2987 (void) strcpy(di_mem_addr(st, off), mdi_pi_get_addr(pip));
2985 2988 off += size;
2986 2989
2987 2990 /*
2988 2991 * Get path properties if props are to be included in the
2989 2992 * snapshot
2990 2993 */
2991 2994 if (DINFOPROP & st->command) {
2992 2995 me->path_prop = off;
2993 2996 off = di_path_getprop(pip, &me->path_prop, st);
2994 2997 } else {
2995 2998 me->path_prop = 0;
2996 2999 }
2997 3000
2998 3001 mdi_pi_unlock(pip);
2999 3002 }
3000 3003
3001 3004 *off_p = 0;
3002 3005 return (off);
3003 3006 }
3004 3007
3005 3008 /*
3006 3009 * Return driver prop_op entry point for the specified devinfo node.
3007 3010 *
3008 3011 * To return a non-NULL value:
3009 3012 * - driver must be attached and held:
3010 3013 * If driver is not attached we ignore the driver property list.
3011 3014 * No one should rely on such properties.
3012 3015 * - driver "cb_prop_op != ddi_prop_op":
3013 3016 * If "cb_prop_op == ddi_prop_op", framework does not need to call driver.
3014 3017 * XXX or parent's bus_prop_op != ddi_bus_prop_op
3015 3018 */
3016 3019 static int
3017 3020 (*di_getprop_prop_op(struct dev_info *dip))
3018 3021 (dev_t, dev_info_t *, ddi_prop_op_t, int, char *, caddr_t, int *)
3019 3022 {
3020 3023 struct dev_ops *ops;
3021 3024
3022 3025 /* If driver is not attached we ignore the driver property list. */
3023 3026 if ((dip == NULL) || !i_ddi_devi_attached((dev_info_t *)dip))
3024 3027 return (NULL);
3025 3028
3026 3029 /*
3027 3030 * Some nexus drivers incorrectly set cb_prop_op to nodev, nulldev,
3028 3031 * or even NULL.
3029 3032 */
3030 3033 ops = dip->devi_ops;
3031 3034 if (ops && ops->devo_cb_ops &&
3032 3035 (ops->devo_cb_ops->cb_prop_op != ddi_prop_op) &&
3033 3036 (ops->devo_cb_ops->cb_prop_op != nodev) &&
3034 3037 (ops->devo_cb_ops->cb_prop_op != nulldev) &&
3035 3038 (ops->devo_cb_ops->cb_prop_op != NULL))
3036 3039 return (ops->devo_cb_ops->cb_prop_op);
3037 3040 return (NULL);
3038 3041 }
3039 3042
3040 3043 static di_off_t
3041 3044 di_getprop_add(int list, int dyn, struct di_state *st, struct dev_info *dip,
3042 3045 int (*prop_op)(),
3043 3046 char *name, dev_t devt, int aflags, int alen, caddr_t aval,
3044 3047 di_off_t off, di_off_t **off_pp)
3045 3048 {
3046 3049 int need_free = 0;
3047 3050 dev_t pdevt;
3048 3051 int pflags;
3049 3052 int rv;
3050 3053 caddr_t val;
3051 3054 int len;
3052 3055 size_t size;
3053 3056 struct di_prop *pp;
3054 3057
3055 3058 /* If we have prop_op function, ask driver for latest value */
3056 3059 if (prop_op) {
3057 3060 ASSERT(dip);
3058 3061
3059 3062 /* Must search DDI_DEV_T_NONE with DDI_DEV_T_ANY */
3060 3063 pdevt = (devt == DDI_DEV_T_NONE) ? DDI_DEV_T_ANY : devt;
3061 3064
3062 3065 /*
3063 3066 * We have type information in flags, but are invoking an
3064 3067 * old non-typed prop_op(9E) interface. Since not all types are
3065 3068 * part of DDI_PROP_TYPE_ANY (example is DDI_PROP_TYPE_INT64),
3066 3069 * we set DDI_PROP_CONSUMER_TYPED - causing the framework to
3067 3070 * expand type bits beyond DDI_PROP_TYPE_ANY. This allows us
3068 3071 * to use the legacy prop_op(9E) interface to obtain updates
3069 3072 * non-DDI_PROP_TYPE_ANY dynamic properties.
3070 3073 */
3071 3074 pflags = aflags & ~DDI_PROP_TYPE_MASK;
3072 3075 pflags |= DDI_PROP_DONTPASS | DDI_PROP_NOTPROM |
3073 3076 DDI_PROP_CONSUMER_TYPED;
3074 3077
3075 3078 /*
3076 3079 * Hold and exit across prop_op(9E) to avoid lock order
3077 3080 * issues between
3078 3081 * [ndi_devi_enter() ..prop_op(9E).. driver-lock]
3079 3082 * .vs.
3080 3083 * [..ioctl(9E).. driver-lock ..ddi_remove_minor_node(9F)..
3081 3084 * ndi_devi_enter()]
3082 3085 * ordering.
3083 3086 */
3084 3087 ndi_hold_devi((dev_info_t *)dip);
3085 3088 ndi_devi_exit((dev_info_t *)dip, dip->devi_circular);
3086 3089 rv = (*prop_op)(pdevt, (dev_info_t *)dip,
3087 3090 PROP_LEN_AND_VAL_ALLOC, pflags, name, &val, &len);
3088 3091 ndi_devi_enter((dev_info_t *)dip, &dip->devi_circular);
3089 3092 ndi_rele_devi((dev_info_t *)dip);
3090 3093
3091 3094 if (rv == DDI_PROP_SUCCESS) {
3092 3095 need_free = 1; /* dynamic prop obtained */
3093 3096 } else if (dyn) {
3094 3097 /*
3095 3098 * A dynamic property must succeed prop_op(9E) to show
3096 3099 * up in the snapshot - that is the only source of its
3097 3100 * value.
3098 3101 */
3099 3102 return (off); /* dynamic prop not supported */
3100 3103 } else {
3101 3104 /*
3102 3105 * In case calling the driver caused an update off
3103 3106 * prop_op(9E) of a non-dynamic property (code leading
3104 3107 * to ddi_prop_change), we defer picking up val and
3105 3108 * len informatiojn until after prop_op(9E) to ensure
3106 3109 * that we snapshot the latest value.
3107 3110 */
3108 3111 val = aval;
3109 3112 len = alen;
3110 3113
3111 3114 }
3112 3115 } else {
3113 3116 val = aval;
3114 3117 len = alen;
3115 3118 }
3116 3119
3117 3120 dcmn_err((CE_CONT, "di_getprop_add: list %d %s len %d val %p\n",
3118 3121 list, name ? name : "NULL", len, (void *)val));
3119 3122
3120 3123 size = sizeof (struct di_prop);
3121 3124 **off_pp = off = di_checkmem(st, off, size);
3122 3125 pp = DI_PROP(di_mem_addr(st, off));
3123 3126 pp->self = off;
3124 3127 off += size;
3125 3128
3126 3129 pp->dev_major = getmajor(devt);
3127 3130 pp->dev_minor = getminor(devt);
3128 3131 pp->prop_flags = aflags;
3129 3132 pp->prop_list = list;
3130 3133
3131 3134 /* property name */
3132 3135 if (name) {
3133 3136 size = strlen(name) + 1;
3134 3137 pp->prop_name = off = di_checkmem(st, off, size);
3135 3138 (void) strcpy(di_mem_addr(st, off), name);
3136 3139 off += size;
3137 3140 } else {
3138 3141 pp->prop_name = -1;
3139 3142 }
3140 3143
3141 3144 pp->prop_len = len;
3142 3145 if (val == NULL) {
3143 3146 pp->prop_data = -1;
3144 3147 } else if (len != 0) {
3145 3148 size = len;
3146 3149 pp->prop_data = off = di_checkmem(st, off, size);
3147 3150 bcopy(val, di_mem_addr(st, off), size);
3148 3151 off += size;
3149 3152 }
3150 3153
3151 3154 pp->next = 0; /* assume tail for now */
3152 3155 *off_pp = &pp->next; /* return pointer to our next */
3153 3156
3154 3157 if (need_free) /* free PROP_LEN_AND_VAL_ALLOC alloc */
3155 3158 kmem_free(val, len);
3156 3159 return (off);
3157 3160 }
3158 3161
3159 3162
3160 3163 /*
3161 3164 * Copy a list of properties attached to a devinfo node. Called from
3162 3165 * di_copynode with active ndi_devi_enter. The major number is passed in case
3163 3166 * we need to call driver's prop_op entry. The value of list indicates
3164 3167 * which list we are copying. Possible values are:
3165 3168 * DI_PROP_DRV_LIST, DI_PROP_SYS_LIST, DI_PROP_GLB_LIST, DI_PROP_HW_LIST
3166 3169 */
3167 3170 static di_off_t
3168 3171 di_getprop(int list, struct ddi_prop **pprop, di_off_t *off_p,
3169 3172 struct di_state *st, struct dev_info *dip)
3170 3173 {
3171 3174 struct ddi_prop *prop;
3172 3175 int (*prop_op)();
3173 3176 int off;
3174 3177 struct ddi_minor_data *mn;
3175 3178 i_ddi_prop_dyn_t *dp;
3176 3179 struct plist {
3177 3180 struct plist *pl_next;
3178 3181 char *pl_name;
3179 3182 int pl_flags;
3180 3183 dev_t pl_dev;
3181 3184 int pl_len;
3182 3185 caddr_t pl_val;
3183 3186 } *pl, *pl0, **plp;
3184 3187
3185 3188 ASSERT(st != NULL);
3186 3189
3187 3190 off = *off_p;
3188 3191 *off_p = 0;
3189 3192 dcmn_err((CE_CONT, "di_getprop: copy property list %d at addr %p\n",
3190 3193 list, (void *)*pprop));
3191 3194
3192 3195 /* get pointer to driver's prop_op(9E) implementation if DRV_LIST */
3193 3196 prop_op = (list == DI_PROP_DRV_LIST) ? di_getprop_prop_op(dip) : NULL;
3194 3197
3195 3198 /*
3196 3199 * Form private list of properties, holding devi_lock for properties
3197 3200 * that hang off the dip.
3198 3201 */
3199 3202 if (dip)
3200 3203 mutex_enter(&(dip->devi_lock));
3201 3204 for (pl0 = NULL, plp = &pl0, prop = *pprop;
3202 3205 prop; plp = &pl->pl_next, prop = prop->prop_next) {
3203 3206 pl = kmem_alloc(sizeof (*pl), KM_SLEEP);
3204 3207 *plp = pl;
3205 3208 pl->pl_next = NULL;
3206 3209 if (prop->prop_name)
3207 3210 pl->pl_name = i_ddi_strdup(prop->prop_name, KM_SLEEP);
3208 3211 else
3209 3212 pl->pl_name = NULL;
3210 3213 pl->pl_flags = prop->prop_flags;
3211 3214 pl->pl_dev = prop->prop_dev;
3212 3215 if (prop->prop_len) {
3213 3216 pl->pl_len = prop->prop_len;
3214 3217 pl->pl_val = kmem_alloc(pl->pl_len, KM_SLEEP);
3215 3218 bcopy(prop->prop_val, pl->pl_val, pl->pl_len);
3216 3219 } else {
3217 3220 pl->pl_len = 0;
3218 3221 pl->pl_val = NULL;
3219 3222 }
3220 3223 }
3221 3224 if (dip)
3222 3225 mutex_exit(&(dip->devi_lock));
3223 3226
3224 3227 /*
3225 3228 * Now that we have dropped devi_lock, perform a second-pass to
3226 3229 * add properties to the snapshot. We do this as a second pass
3227 3230 * because we may need to call prop_op(9E) and we can't hold
3228 3231 * devi_lock across that call.
3229 3232 */
3230 3233 for (pl = pl0; pl; pl = pl0) {
3231 3234 pl0 = pl->pl_next;
3232 3235 off = di_getprop_add(list, 0, st, dip, prop_op, pl->pl_name,
3233 3236 pl->pl_dev, pl->pl_flags, pl->pl_len, pl->pl_val,
3234 3237 off, &off_p);
3235 3238 if (pl->pl_val)
3236 3239 kmem_free(pl->pl_val, pl->pl_len);
3237 3240 if (pl->pl_name)
3238 3241 kmem_free(pl->pl_name, strlen(pl->pl_name) + 1);
3239 3242 kmem_free(pl, sizeof (*pl));
3240 3243 }
3241 3244
3242 3245 /*
3243 3246 * If there is no prop_op or dynamic property support has been
3244 3247 * disabled, we are done.
3245 3248 */
3246 3249 if ((prop_op == NULL) || (di_prop_dyn == 0)) {
3247 3250 *off_p = 0;
3248 3251 return (off);
3249 3252 }
3250 3253
3251 3254 /* Add dynamic driver properties to snapshot */
3252 3255 for (dp = i_ddi_prop_dyn_driver_get((dev_info_t *)dip);
3253 3256 dp && dp->dp_name; dp++) {
3254 3257 if (dp->dp_spec_type) {
3255 3258 /* if spec_type, property of matching minor */
3256 3259 ASSERT(DEVI_BUSY_OWNED(dip));
3257 3260 for (mn = dip->devi_minor; mn; mn = mn->next) {
3258 3261 if (mn->ddm_spec_type != dp->dp_spec_type)
3259 3262 continue;
3260 3263 off = di_getprop_add(list, 1, st, dip, prop_op,
3261 3264 dp->dp_name, mn->ddm_dev, dp->dp_type,
3262 3265 0, NULL, off, &off_p);
3263 3266 }
3264 3267 } else {
3265 3268 /* property of devinfo node */
3266 3269 off = di_getprop_add(list, 1, st, dip, prop_op,
3267 3270 dp->dp_name, DDI_DEV_T_NONE, dp->dp_type,
3268 3271 0, NULL, off, &off_p);
3269 3272 }
3270 3273 }
3271 3274
3272 3275 /* Add dynamic parent properties to snapshot */
3273 3276 for (dp = i_ddi_prop_dyn_parent_get((dev_info_t *)dip);
3274 3277 dp && dp->dp_name; dp++) {
3275 3278 if (dp->dp_spec_type) {
3276 3279 /* if spec_type, property of matching minor */
3277 3280 ASSERT(DEVI_BUSY_OWNED(dip));
3278 3281 for (mn = dip->devi_minor; mn; mn = mn->next) {
3279 3282 if (mn->ddm_spec_type != dp->dp_spec_type)
3280 3283 continue;
3281 3284 off = di_getprop_add(list, 1, st, dip, prop_op,
3282 3285 dp->dp_name, mn->ddm_dev, dp->dp_type,
3283 3286 0, NULL, off, &off_p);
3284 3287 }
3285 3288 } else {
3286 3289 /* property of devinfo node */
3287 3290 off = di_getprop_add(list, 1, st, dip, prop_op,
3288 3291 dp->dp_name, DDI_DEV_T_NONE, dp->dp_type,
3289 3292 0, NULL, off, &off_p);
3290 3293 }
3291 3294 }
3292 3295
3293 3296 *off_p = 0;
3294 3297 return (off);
3295 3298 }
3296 3299
3297 3300 /*
3298 3301 * find private data format attached to a dip
3299 3302 * parent = 1 to match driver name of parent dip (for parent private data)
3300 3303 * 0 to match driver name of current dip (for driver private data)
3301 3304 */
3302 3305 #define DI_MATCH_DRIVER 0
3303 3306 #define DI_MATCH_PARENT 1
3304 3307
3305 3308 struct di_priv_format *
3306 3309 di_match_drv_name(struct dev_info *node, struct di_state *st, int match)
3307 3310 {
3308 3311 int i, count, len;
3309 3312 char *drv_name;
3310 3313 major_t major;
3311 3314 struct di_all *all;
3312 3315 struct di_priv_format *form;
3313 3316
3314 3317 dcmn_err2((CE_CONT, "di_match_drv_name: node = %s, match = %x\n",
3315 3318 node->devi_node_name, match));
3316 3319
3317 3320 if (match == DI_MATCH_PARENT) {
3318 3321 node = DEVI(node->devi_parent);
3319 3322 }
3320 3323
3321 3324 if (node == NULL) {
3322 3325 return (NULL);
3323 3326 }
3324 3327
3325 3328 major = node->devi_major;
3326 3329 if (major == (major_t)(-1)) {
3327 3330 return (NULL);
3328 3331 }
3329 3332
3330 3333 /*
3331 3334 * Match the driver name.
3332 3335 */
3333 3336 drv_name = ddi_major_to_name(major);
3334 3337 if ((drv_name == NULL) || *drv_name == '\0') {
3335 3338 return (NULL);
3336 3339 }
3337 3340
3338 3341 /* Now get the di_priv_format array */
3339 3342 all = DI_ALL_PTR(st);
3340 3343 if (match == DI_MATCH_PARENT) {
3341 3344 count = all->n_ppdata;
3342 3345 form = DI_PRIV_FORMAT(di_mem_addr(st, all->ppdata_format));
3343 3346 } else {
3344 3347 count = all->n_dpdata;
3345 3348 form = DI_PRIV_FORMAT(di_mem_addr(st, all->dpdata_format));
3346 3349 }
3347 3350
3348 3351 len = strlen(drv_name);
3349 3352 for (i = 0; i < count; i++) {
3350 3353 char *tmp;
3351 3354
3352 3355 tmp = form[i].drv_name;
3353 3356 while (tmp && (*tmp != '\0')) {
3354 3357 if (strncmp(drv_name, tmp, len) == 0) {
3355 3358 return (&form[i]);
3356 3359 }
3357 3360 /*
3358 3361 * Move to next driver name, skipping a white space
3359 3362 */
3360 3363 if (tmp = strchr(tmp, ' ')) {
3361 3364 tmp++;
3362 3365 }
3363 3366 }
3364 3367 }
3365 3368
3366 3369 return (NULL);
3367 3370 }
3368 3371
3369 3372 /*
3370 3373 * The following functions copy data as specified by the format passed in.
3371 3374 * To prevent invalid format from panicing the system, we call on_fault().
3372 3375 * A return value of 0 indicates an error. Otherwise, the total offset
3373 3376 * is returned.
3374 3377 */
3375 3378 #define DI_MAX_PRIVDATA (PAGESIZE >> 1) /* max private data size */
3376 3379
3377 3380 static di_off_t
3378 3381 di_getprvdata(struct di_priv_format *pdp, struct dev_info *node,
3379 3382 void *data, di_off_t *off_p, struct di_state *st)
3380 3383 {
3381 3384 caddr_t pa;
3382 3385 void *ptr;
3383 3386 int i, size, repeat;
3384 3387 di_off_t off, off0, *tmp;
3385 3388 char *path;
3386 3389 label_t ljb;
3387 3390
3388 3391 dcmn_err2((CE_CONT, "di_getprvdata:\n"));
3389 3392
3390 3393 /*
3391 3394 * check memory availability. Private data size is
3392 3395 * limited to DI_MAX_PRIVDATA.
3393 3396 */
3394 3397 off = di_checkmem(st, *off_p, DI_MAX_PRIVDATA);
3395 3398 *off_p = off;
3396 3399
3397 3400 if ((pdp->bytes == 0) || pdp->bytes > DI_MAX_PRIVDATA) {
3398 3401 goto failure;
3399 3402 }
3400 3403
3401 3404 if (!on_fault(&ljb)) {
3402 3405 /* copy the struct */
3403 3406 bcopy(data, di_mem_addr(st, off), pdp->bytes);
3404 3407 off0 = DI_ALIGN(pdp->bytes); /* XXX remove DI_ALIGN */
3405 3408
3406 3409 /* dereferencing pointers */
3407 3410 for (i = 0; i < MAX_PTR_IN_PRV; i++) {
3408 3411
3409 3412 if (pdp->ptr[i].size == 0) {
3410 3413 goto success; /* no more ptrs */
3411 3414 }
3412 3415
3413 3416 /*
3414 3417 * first, get the pointer content
3415 3418 */
3416 3419 if ((pdp->ptr[i].offset < 0) ||
3417 3420 (pdp->ptr[i].offset > pdp->bytes - sizeof (char *)))
3418 3421 goto failure; /* wrong offset */
3419 3422
3420 3423 pa = di_mem_addr(st, off + pdp->ptr[i].offset);
3421 3424
3422 3425 /* save a tmp ptr to store off_t later */
3423 3426 tmp = (di_off_t *)(intptr_t)pa;
3424 3427
3425 3428 /* get pointer value, if NULL continue */
3426 3429 ptr = *((void **) (intptr_t)pa);
3427 3430 if (ptr == NULL) {
3428 3431 continue;
3429 3432 }
3430 3433
3431 3434 /*
3432 3435 * next, find the repeat count (array dimension)
3433 3436 */
3434 3437 repeat = pdp->ptr[i].len_offset;
3435 3438
3436 3439 /*
3437 3440 * Positive value indicates a fixed sized array.
3438 3441 * 0 or negative value indicates variable sized array.
3439 3442 *
3440 3443 * For variable sized array, the variable must be
3441 3444 * an int member of the structure, with an offset
3442 3445 * equal to the absolution value of struct member.
3443 3446 */
3444 3447 if (repeat > pdp->bytes - sizeof (int)) {
3445 3448 goto failure; /* wrong offset */
3446 3449 }
3447 3450
3448 3451 if (repeat >= 0) {
3449 3452 repeat = *((int *)
3450 3453 (intptr_t)((caddr_t)data + repeat));
3451 3454 } else {
3452 3455 repeat = -repeat;
3453 3456 }
3454 3457
3455 3458 /*
3456 3459 * next, get the size of the object to be copied
3457 3460 */
3458 3461 size = pdp->ptr[i].size * repeat;
3459 3462
3460 3463 /*
3461 3464 * Arbitrarily limit the total size of object to be
3462 3465 * copied (1 byte to 1/4 page).
3463 3466 */
3464 3467 if ((size <= 0) || (size > (DI_MAX_PRIVDATA - off0))) {
3465 3468 goto failure; /* wrong size or too big */
3466 3469 }
3467 3470
3468 3471 /*
3469 3472 * Now copy the data
3470 3473 */
3471 3474 *tmp = off0;
3472 3475 bcopy(ptr, di_mem_addr(st, off + off0), size);
3473 3476 off0 += DI_ALIGN(size); /* XXX remove DI_ALIGN */
3474 3477 }
3475 3478 } else {
3476 3479 goto failure;
3477 3480 }
3478 3481
3479 3482 success:
3480 3483 /*
3481 3484 * success if reached here
3482 3485 */
3483 3486 no_fault();
3484 3487 return (off + off0);
3485 3488 /*NOTREACHED*/
3486 3489
3487 3490 failure:
3488 3491 /*
3489 3492 * fault occurred
3490 3493 */
3491 3494 no_fault();
3492 3495 path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3493 3496 cmn_err(CE_WARN, "devinfo: fault on private data for '%s' at %p",
3494 3497 ddi_pathname((dev_info_t *)node, path), data);
3495 3498 kmem_free(path, MAXPATHLEN);
3496 3499 *off_p = -1; /* set private data to indicate error */
3497 3500
3498 3501 return (off);
3499 3502 }
3500 3503
3501 3504 /*
3502 3505 * get parent private data; on error, returns original offset
3503 3506 */
3504 3507 static di_off_t
3505 3508 di_getppdata(struct dev_info *node, di_off_t *off_p, struct di_state *st)
3506 3509 {
3507 3510 int off;
3508 3511 struct di_priv_format *ppdp;
3509 3512
3510 3513 dcmn_err2((CE_CONT, "di_getppdata:\n"));
3511 3514
3512 3515 /* find the parent data format */
3513 3516 if ((ppdp = di_match_drv_name(node, st, DI_MATCH_PARENT)) == NULL) {
3514 3517 off = *off_p;
3515 3518 *off_p = 0; /* set parent data to none */
3516 3519 return (off);
3517 3520 }
3518 3521
3519 3522 return (di_getprvdata(ppdp, node,
3520 3523 ddi_get_parent_data((dev_info_t *)node), off_p, st));
3521 3524 }
3522 3525
3523 3526 /*
3524 3527 * get parent private data; returns original offset
3525 3528 */
3526 3529 static di_off_t
3527 3530 di_getdpdata(struct dev_info *node, di_off_t *off_p, struct di_state *st)
3528 3531 {
3529 3532 int off;
3530 3533 struct di_priv_format *dpdp;
3531 3534
3532 3535 dcmn_err2((CE_CONT, "di_getdpdata:"));
3533 3536
3534 3537 /* find the parent data format */
3535 3538 if ((dpdp = di_match_drv_name(node, st, DI_MATCH_DRIVER)) == NULL) {
3536 3539 off = *off_p;
3537 3540 *off_p = 0; /* set driver data to none */
3538 3541 return (off);
3539 3542 }
3540 3543
3541 3544 return (di_getprvdata(dpdp, node,
3542 3545 ddi_get_driver_private((dev_info_t *)node), off_p, st));
3543 3546 }
3544 3547
3545 3548 /*
3546 3549 * Copy hotplug data associated with a devinfo node into the snapshot.
3547 3550 */
3548 3551 static di_off_t
3549 3552 di_gethpdata(ddi_hp_cn_handle_t *hp_hdl, di_off_t *off_p,
3550 3553 struct di_state *st)
3551 3554 {
3552 3555 struct i_hp *hp;
3553 3556 struct di_hp *me;
3554 3557 size_t size;
3555 3558 di_off_t off;
3556 3559
3557 3560 dcmn_err2((CE_CONT, "di_gethpdata:\n"));
3558 3561
3559 3562 /*
3560 3563 * check memory first
3561 3564 */
3562 3565 off = di_checkmem(st, *off_p, sizeof (struct di_hp));
3563 3566 *off_p = off;
3564 3567
3565 3568 do {
3566 3569 me = DI_HP(di_mem_addr(st, off));
3567 3570 me->self = off;
3568 3571 me->hp_name = 0;
3569 3572 me->hp_connection = (int)hp_hdl->cn_info.cn_num;
3570 3573 me->hp_depends_on = (int)hp_hdl->cn_info.cn_num_dpd_on;
3571 3574 (void) ddihp_cn_getstate(hp_hdl);
3572 3575 me->hp_state = (int)hp_hdl->cn_info.cn_state;
3573 3576 me->hp_type = (int)hp_hdl->cn_info.cn_type;
3574 3577 me->hp_type_str = 0;
3575 3578 me->hp_last_change = (uint32_t)hp_hdl->cn_info.cn_last_change;
3576 3579 me->hp_child = 0;
3577 3580
3578 3581 /*
3579 3582 * Child links are resolved later by di_hotplug_children().
3580 3583 * Store a reference to this di_hp_t in the list used later
3581 3584 * by di_hotplug_children().
3582 3585 */
3583 3586 hp = kmem_zalloc(sizeof (i_hp_t), KM_SLEEP);
3584 3587 hp->hp_off = off;
3585 3588 hp->hp_child = hp_hdl->cn_info.cn_child;
3586 3589 list_insert_tail(&st->hp_list, hp);
3587 3590
3588 3591 off += sizeof (struct di_hp);
3589 3592
3590 3593 /* Add name of this di_hp_t to the snapshot */
3591 3594 if (hp_hdl->cn_info.cn_name) {
3592 3595 size = strlen(hp_hdl->cn_info.cn_name) + 1;
3593 3596 me->hp_name = off = di_checkmem(st, off, size);
3594 3597 (void) strcpy(di_mem_addr(st, off),
3595 3598 hp_hdl->cn_info.cn_name);
3596 3599 off += size;
3597 3600 }
3598 3601
3599 3602 /* Add type description of this di_hp_t to the snapshot */
3600 3603 if (hp_hdl->cn_info.cn_type_str) {
3601 3604 size = strlen(hp_hdl->cn_info.cn_type_str) + 1;
3602 3605 me->hp_type_str = off = di_checkmem(st, off, size);
3603 3606 (void) strcpy(di_mem_addr(st, off),
3604 3607 hp_hdl->cn_info.cn_type_str);
3605 3608 off += size;
3606 3609 }
3607 3610
3608 3611 /*
3609 3612 * Set link to next in the chain of di_hp_t nodes,
3610 3613 * or terminate the chain when processing the last node.
3611 3614 */
3612 3615 if (hp_hdl->next != NULL) {
3613 3616 off = di_checkmem(st, off, sizeof (struct di_hp));
3614 3617 me->next = off;
3615 3618 } else {
3616 3619 me->next = 0;
3617 3620 }
3618 3621
3619 3622 /* Update pointer to next in the chain */
3620 3623 hp_hdl = hp_hdl->next;
3621 3624
3622 3625 } while (hp_hdl);
3623 3626
3624 3627 return (off);
3625 3628 }
3626 3629
3627 3630 /*
3628 3631 * The driver is stateful across DINFOCPYALL and DINFOUSRLD.
3629 3632 * This function encapsulates the state machine:
3630 3633 *
3631 3634 * -> IOC_IDLE -> IOC_SNAP -> IOC_DONE -> IOC_COPY ->
3632 3635 * | SNAPSHOT USRLD |
3633 3636 * --------------------------------------------------
3634 3637 *
3635 3638 * Returns 0 on success and -1 on failure
3636 3639 */
3637 3640 static int
3638 3641 di_setstate(struct di_state *st, int new_state)
3639 3642 {
3640 3643 int ret = 0;
3641 3644
3642 3645 mutex_enter(&di_lock);
3643 3646 switch (new_state) {
3644 3647 case IOC_IDLE:
3645 3648 case IOC_DONE:
3646 3649 break;
3647 3650 case IOC_SNAP:
3648 3651 if (st->di_iocstate != IOC_IDLE)
3649 3652 ret = -1;
3650 3653 break;
3651 3654 case IOC_COPY:
3652 3655 if (st->di_iocstate != IOC_DONE)
3653 3656 ret = -1;
3654 3657 break;
3655 3658 default:
3656 3659 ret = -1;
3657 3660 }
3658 3661
3659 3662 if (ret == 0)
3660 3663 st->di_iocstate = new_state;
3661 3664 else
3662 3665 cmn_err(CE_NOTE, "incorrect state transition from %d to %d",
3663 3666 st->di_iocstate, new_state);
3664 3667 mutex_exit(&di_lock);
3665 3668 return (ret);
3666 3669 }
3667 3670
3668 3671 /*
3669 3672 * We cannot assume the presence of the entire
3670 3673 * snapshot in this routine. All we are guaranteed
3671 3674 * is the di_all struct + 1 byte (for root_path)
3672 3675 */
3673 3676 static int
3674 3677 header_plus_one_ok(struct di_all *all)
3675 3678 {
3676 3679 /*
3677 3680 * Refuse to read old versions
3678 3681 */
3679 3682 if (all->version != DI_SNAPSHOT_VERSION) {
3680 3683 CACHE_DEBUG((DI_ERR, "bad version: 0x%x", all->version));
3681 3684 return (0);
3682 3685 }
3683 3686
3684 3687 if (all->cache_magic != DI_CACHE_MAGIC) {
3685 3688 CACHE_DEBUG((DI_ERR, "bad magic #: 0x%x", all->cache_magic));
3686 3689 return (0);
3687 3690 }
3688 3691
3689 3692 if (all->snapshot_time == 0) {
3690 3693 CACHE_DEBUG((DI_ERR, "bad timestamp: %ld", all->snapshot_time));
3691 3694 return (0);
3692 3695 }
3693 3696
3694 3697 if (all->top_devinfo == 0) {
3695 3698 CACHE_DEBUG((DI_ERR, "NULL top devinfo"));
3696 3699 return (0);
3697 3700 }
3698 3701
3699 3702 if (all->map_size < sizeof (*all) + 1) {
3700 3703 CACHE_DEBUG((DI_ERR, "bad map size: %u", all->map_size));
3701 3704 return (0);
3702 3705 }
3703 3706
3704 3707 if (all->root_path[0] != '/' || all->root_path[1] != '\0') {
3705 3708 CACHE_DEBUG((DI_ERR, "bad rootpath: %c%c",
3706 3709 all->root_path[0], all->root_path[1]));
3707 3710 return (0);
3708 3711 }
3709 3712
3710 3713 /*
3711 3714 * We can't check checksum here as we just have the header
3712 3715 */
3713 3716
3714 3717 return (1);
3715 3718 }
3716 3719
3717 3720 static int
3718 3721 chunk_write(struct vnode *vp, offset_t off, caddr_t buf, size_t len)
3719 3722 {
3720 3723 rlim64_t rlimit;
3721 3724 ssize_t resid;
3722 3725 int error = 0;
3723 3726
3724 3727
3725 3728 rlimit = RLIM64_INFINITY;
3726 3729
3727 3730 while (len) {
3728 3731 resid = 0;
3729 3732 error = vn_rdwr(UIO_WRITE, vp, buf, len, off,
3730 3733 UIO_SYSSPACE, FSYNC, rlimit, kcred, &resid);
3731 3734
3732 3735 if (error || resid < 0) {
3733 3736 error = error ? error : EIO;
3734 3737 CACHE_DEBUG((DI_ERR, "write error: %d", error));
3735 3738 break;
3736 3739 }
3737 3740
3738 3741 /*
3739 3742 * Check if we are making progress
3740 3743 */
3741 3744 if (resid >= len) {
3742 3745 error = ENOSPC;
3743 3746 break;
3744 3747 }
3745 3748 buf += len - resid;
3746 3749 off += len - resid;
3747 3750 len = resid;
3748 3751 }
3749 3752
3750 3753 return (error);
3751 3754 }
3752 3755
3753 3756 static void
3754 3757 di_cache_write(struct di_cache *cache)
3755 3758 {
3756 3759 struct di_all *all;
3757 3760 struct vnode *vp;
3758 3761 int oflags;
3759 3762 size_t map_size;
3760 3763 size_t chunk;
3761 3764 offset_t off;
3762 3765 int error;
3763 3766 char *buf;
3764 3767
3765 3768 ASSERT(DI_CACHE_LOCKED(*cache));
3766 3769 ASSERT(!servicing_interrupt());
3767 3770
3768 3771 if (cache->cache_size == 0) {
3769 3772 ASSERT(cache->cache_data == NULL);
3770 3773 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping write"));
3771 3774 return;
3772 3775 }
3773 3776
3774 3777 ASSERT(cache->cache_size > 0);
3775 3778 ASSERT(cache->cache_data);
3776 3779
3777 3780 if (!modrootloaded || rootvp == NULL || vn_is_readonly(rootvp)) {
3778 3781 CACHE_DEBUG((DI_ERR, "Can't write to rootFS. Skipping write"));
3779 3782 return;
3780 3783 }
3781 3784
3782 3785 all = (struct di_all *)cache->cache_data;
3783 3786
3784 3787 if (!header_plus_one_ok(all)) {
3785 3788 CACHE_DEBUG((DI_ERR, "Invalid header. Skipping write"));
3786 3789 return;
3787 3790 }
3788 3791
3789 3792 ASSERT(strcmp(all->root_path, "/") == 0);
3790 3793
3791 3794 /*
3792 3795 * The cache_size is the total allocated memory for the cache.
3793 3796 * The map_size is the actual size of valid data in the cache.
3794 3797 * map_size may be smaller than cache_size but cannot exceed
3795 3798 * cache_size.
3796 3799 */
3797 3800 if (all->map_size > cache->cache_size) {
3798 3801 CACHE_DEBUG((DI_ERR, "map_size (0x%x) > cache_size (0x%x)."
3799 3802 " Skipping write", all->map_size, cache->cache_size));
3800 3803 return;
3801 3804 }
3802 3805
3803 3806 /*
3804 3807 * First unlink the temp file
3805 3808 */
3806 3809 error = vn_remove(DI_CACHE_TEMP, UIO_SYSSPACE, RMFILE);
3807 3810 if (error && error != ENOENT) {
3808 3811 CACHE_DEBUG((DI_ERR, "%s: unlink failed: %d",
3809 3812 DI_CACHE_TEMP, error));
3810 3813 }
3811 3814
3812 3815 if (error == EROFS) {
3813 3816 CACHE_DEBUG((DI_ERR, "RDONLY FS. Skipping write"));
3814 3817 return;
3815 3818 }
3816 3819
3817 3820 vp = NULL;
3818 3821 oflags = (FCREAT|FWRITE);
3819 3822 if (error = vn_open(DI_CACHE_TEMP, UIO_SYSSPACE, oflags,
3820 3823 DI_CACHE_PERMS, &vp, CRCREAT, 0)) {
3821 3824 CACHE_DEBUG((DI_ERR, "%s: create failed: %d",
3822 3825 DI_CACHE_TEMP, error));
3823 3826 return;
3824 3827 }
3825 3828
3826 3829 ASSERT(vp);
3827 3830
3828 3831 /*
3829 3832 * Paranoid: Check if the file is on a read-only FS
3830 3833 */
3831 3834 if (vn_is_readonly(vp)) {
3832 3835 CACHE_DEBUG((DI_ERR, "cannot write: readonly FS"));
3833 3836 goto fail;
3834 3837 }
3835 3838
3836 3839 /*
3837 3840 * Note that we only write map_size bytes to disk - this saves
3838 3841 * space as the actual cache size may be larger than size of
3839 3842 * valid data in the cache.
3840 3843 * Another advantage is that it makes verification of size
3841 3844 * easier when the file is read later.
3842 3845 */
3843 3846 map_size = all->map_size;
3844 3847 off = 0;
3845 3848 buf = cache->cache_data;
3846 3849
3847 3850 while (map_size) {
3848 3851 ASSERT(map_size > 0);
3849 3852 /*
3850 3853 * Write in chunks so that VM system
3851 3854 * is not overwhelmed
3852 3855 */
3853 3856 if (map_size > di_chunk * PAGESIZE)
3854 3857 chunk = di_chunk * PAGESIZE;
3855 3858 else
3856 3859 chunk = map_size;
3857 3860
3858 3861 error = chunk_write(vp, off, buf, chunk);
3859 3862 if (error) {
3860 3863 CACHE_DEBUG((DI_ERR, "write failed: off=0x%x: %d",
3861 3864 off, error));
3862 3865 goto fail;
3863 3866 }
3864 3867
3865 3868 off += chunk;
3866 3869 buf += chunk;
3867 3870 map_size -= chunk;
3868 3871
3869 3872 /* If low on memory, give pageout a chance to run */
3870 3873 if (freemem < desfree)
3871 3874 delay(1);
3872 3875 }
3873 3876
3874 3877 /*
3875 3878 * Now sync the file and close it
3876 3879 */
3877 3880 if (error = VOP_FSYNC(vp, FSYNC, kcred, NULL)) {
3878 3881 CACHE_DEBUG((DI_ERR, "FSYNC failed: %d", error));
3879 3882 }
3880 3883
3881 3884 if (error = VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred, NULL)) {
3882 3885 CACHE_DEBUG((DI_ERR, "close() failed: %d", error));
3883 3886 VN_RELE(vp);
3884 3887 return;
3885 3888 }
3886 3889
3887 3890 VN_RELE(vp);
3888 3891
3889 3892 /*
3890 3893 * Now do the rename
3891 3894 */
3892 3895 if (error = vn_rename(DI_CACHE_TEMP, DI_CACHE_FILE, UIO_SYSSPACE)) {
3893 3896 CACHE_DEBUG((DI_ERR, "rename failed: %d", error));
3894 3897 return;
3895 3898 }
3896 3899
3897 3900 CACHE_DEBUG((DI_INFO, "Cache write successful."));
3898 3901
3899 3902 return;
3900 3903
3901 3904 fail:
3902 3905 (void) VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred, NULL);
3903 3906 VN_RELE(vp);
3904 3907 }
3905 3908
3906 3909
3907 3910 /*
3908 3911 * Since we could be called early in boot,
3909 3912 * use kobj_read_file()
3910 3913 */
3911 3914 static void
3912 3915 di_cache_read(struct di_cache *cache)
3913 3916 {
3914 3917 struct _buf *file;
3915 3918 struct di_all *all;
3916 3919 int n;
3917 3920 size_t map_size, sz, chunk;
3918 3921 offset_t off;
3919 3922 caddr_t buf;
3920 3923 uint32_t saved_crc, crc;
3921 3924
3922 3925 ASSERT(modrootloaded);
3923 3926 ASSERT(DI_CACHE_LOCKED(*cache));
3924 3927 ASSERT(cache->cache_data == NULL);
3925 3928 ASSERT(cache->cache_size == 0);
3926 3929 ASSERT(!servicing_interrupt());
3927 3930
3928 3931 file = kobj_open_file(DI_CACHE_FILE);
3929 3932 if (file == (struct _buf *)-1) {
3930 3933 CACHE_DEBUG((DI_ERR, "%s: open failed: %d",
3931 3934 DI_CACHE_FILE, ENOENT));
3932 3935 return;
3933 3936 }
3934 3937
3935 3938 /*
3936 3939 * Read in the header+root_path first. The root_path must be "/"
3937 3940 */
3938 3941 all = kmem_zalloc(sizeof (*all) + 1, KM_SLEEP);
3939 3942 n = kobj_read_file(file, (caddr_t)all, sizeof (*all) + 1, 0);
3940 3943
3941 3944 if ((n != sizeof (*all) + 1) || !header_plus_one_ok(all)) {
3942 3945 kmem_free(all, sizeof (*all) + 1);
3943 3946 kobj_close_file(file);
3944 3947 CACHE_DEBUG((DI_ERR, "cache header: read error or invalid"));
3945 3948 return;
3946 3949 }
3947 3950
3948 3951 map_size = all->map_size;
3949 3952
3950 3953 kmem_free(all, sizeof (*all) + 1);
3951 3954
3952 3955 ASSERT(map_size >= sizeof (*all) + 1);
3953 3956
3954 3957 buf = di_cache.cache_data = kmem_alloc(map_size, KM_SLEEP);
3955 3958 sz = map_size;
3956 3959 off = 0;
3957 3960 while (sz) {
3958 3961 /* Don't overload VM with large reads */
3959 3962 chunk = (sz > di_chunk * PAGESIZE) ? di_chunk * PAGESIZE : sz;
3960 3963 n = kobj_read_file(file, buf, chunk, off);
3961 3964 if (n != chunk) {
3962 3965 CACHE_DEBUG((DI_ERR, "%s: read error at offset: %lld",
3963 3966 DI_CACHE_FILE, off));
3964 3967 goto fail;
3965 3968 }
3966 3969 off += chunk;
3967 3970 buf += chunk;
3968 3971 sz -= chunk;
3969 3972 }
3970 3973
3971 3974 ASSERT(off == map_size);
3972 3975
3973 3976 /*
3974 3977 * Read past expected EOF to verify size.
3975 3978 */
3976 3979 if (kobj_read_file(file, (caddr_t)&sz, 1, off) > 0) {
3977 3980 CACHE_DEBUG((DI_ERR, "%s: file size changed", DI_CACHE_FILE));
3978 3981 goto fail;
3979 3982 }
3980 3983
3981 3984 all = (struct di_all *)di_cache.cache_data;
3982 3985 if (!header_plus_one_ok(all)) {
3983 3986 CACHE_DEBUG((DI_ERR, "%s: file header changed", DI_CACHE_FILE));
3984 3987 goto fail;
3985 3988 }
3986 3989
3987 3990 /*
3988 3991 * Compute CRC with checksum field in the cache data set to 0
3989 3992 */
3990 3993 saved_crc = all->cache_checksum;
3991 3994 all->cache_checksum = 0;
3992 3995 CRC32(crc, di_cache.cache_data, map_size, -1U, crc32_table);
3993 3996 all->cache_checksum = saved_crc;
3994 3997
3995 3998 if (crc != all->cache_checksum) {
3996 3999 CACHE_DEBUG((DI_ERR,
3997 4000 "%s: checksum error: expected=0x%x actual=0x%x",
3998 4001 DI_CACHE_FILE, all->cache_checksum, crc));
3999 4002 goto fail;
4000 4003 }
4001 4004
4002 4005 if (all->map_size != map_size) {
4003 4006 CACHE_DEBUG((DI_ERR, "%s: map size changed", DI_CACHE_FILE));
4004 4007 goto fail;
4005 4008 }
4006 4009
4007 4010 kobj_close_file(file);
4008 4011
4009 4012 di_cache.cache_size = map_size;
4010 4013
4011 4014 return;
4012 4015
4013 4016 fail:
4014 4017 kmem_free(di_cache.cache_data, map_size);
4015 4018 kobj_close_file(file);
4016 4019 di_cache.cache_data = NULL;
4017 4020 di_cache.cache_size = 0;
4018 4021 }
4019 4022
4020 4023
4021 4024 /*
4022 4025 * Checks if arguments are valid for using the cache.
4023 4026 */
4024 4027 static int
4025 4028 cache_args_valid(struct di_state *st, int *error)
4026 4029 {
4027 4030 ASSERT(error);
4028 4031 ASSERT(st->mem_size > 0);
4029 4032 ASSERT(st->memlist != NULL);
4030 4033
4031 4034 if (!modrootloaded || !i_ddi_io_initialized()) {
4032 4035 CACHE_DEBUG((DI_ERR,
4033 4036 "cache lookup failure: I/O subsystem not inited"));
4034 4037 *error = ENOTACTIVE;
4035 4038 return (0);
4036 4039 }
4037 4040
4038 4041 /*
4039 4042 * No other flags allowed with DINFOCACHE
4040 4043 */
4041 4044 if (st->command != (DINFOCACHE & DIIOC_MASK)) {
4042 4045 CACHE_DEBUG((DI_ERR,
4043 4046 "cache lookup failure: bad flags: 0x%x",
4044 4047 st->command));
4045 4048 *error = EINVAL;
4046 4049 return (0);
4047 4050 }
4048 4051
4049 4052 if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) {
4050 4053 CACHE_DEBUG((DI_ERR,
4051 4054 "cache lookup failure: bad root: %s",
4052 4055 DI_ALL_PTR(st)->root_path));
4053 4056 *error = EINVAL;
4054 4057 return (0);
4055 4058 }
4056 4059
4057 4060 CACHE_DEBUG((DI_INFO, "cache lookup args ok: 0x%x", st->command));
4058 4061
4059 4062 *error = 0;
4060 4063
4061 4064 return (1);
4062 4065 }
4063 4066
4064 4067 static int
4065 4068 snapshot_is_cacheable(struct di_state *st)
4066 4069 {
4067 4070 ASSERT(st->mem_size > 0);
4068 4071 ASSERT(st->memlist != NULL);
4069 4072
4070 4073 if ((st->command & DI_CACHE_SNAPSHOT_FLAGS) !=
4071 4074 (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK)) {
4072 4075 CACHE_DEBUG((DI_INFO,
4073 4076 "not cacheable: incompatible flags: 0x%x",
4074 4077 st->command));
4075 4078 return (0);
4076 4079 }
4077 4080
4078 4081 if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) {
4079 4082 CACHE_DEBUG((DI_INFO,
4080 4083 "not cacheable: incompatible root path: %s",
4081 4084 DI_ALL_PTR(st)->root_path));
4082 4085 return (0);
4083 4086 }
4084 4087
4085 4088 CACHE_DEBUG((DI_INFO, "cacheable snapshot request: 0x%x", st->command));
4086 4089
4087 4090 return (1);
4088 4091 }
4089 4092
4090 4093 static int
4091 4094 di_cache_lookup(struct di_state *st)
4092 4095 {
4093 4096 size_t rval;
4094 4097 int cache_valid;
4095 4098
4096 4099 ASSERT(cache_args_valid(st, &cache_valid));
4097 4100 ASSERT(modrootloaded);
4098 4101
4099 4102 DI_CACHE_LOCK(di_cache);
4100 4103
4101 4104 /*
4102 4105 * The following assignment determines the validity
4103 4106 * of the cache as far as this snapshot is concerned.
4104 4107 */
4105 4108 cache_valid = di_cache.cache_valid;
4106 4109
4107 4110 if (cache_valid && di_cache.cache_data == NULL) {
4108 4111 di_cache_read(&di_cache);
4109 4112 /* check for read or file error */
4110 4113 if (di_cache.cache_data == NULL)
4111 4114 cache_valid = 0;
4112 4115 }
4113 4116
4114 4117 if (cache_valid) {
4115 4118 /*
4116 4119 * Ok, the cache was valid as of this particular
4117 4120 * snapshot. Copy the cached snapshot. This is safe
4118 4121 * to do as the cache cannot be freed (we hold the
4119 4122 * cache lock). Free the memory allocated in di_state
4120 4123 * up until this point - we will simply copy everything
4121 4124 * in the cache.
4122 4125 */
4123 4126
4124 4127 ASSERT(di_cache.cache_data != NULL);
4125 4128 ASSERT(di_cache.cache_size > 0);
4126 4129
4127 4130 di_freemem(st);
4128 4131
4129 4132 rval = 0;
4130 4133 if (di_cache2mem(&di_cache, st) > 0) {
4131 4134 /*
4132 4135 * map_size is size of valid data in the
4133 4136 * cached snapshot and may be less than
4134 4137 * size of the cache.
4135 4138 */
4136 4139 ASSERT(DI_ALL_PTR(st));
4137 4140 rval = DI_ALL_PTR(st)->map_size;
4138 4141
4139 4142 ASSERT(rval >= sizeof (struct di_all));
4140 4143 ASSERT(rval <= di_cache.cache_size);
4141 4144 }
4142 4145 } else {
4143 4146 /*
4144 4147 * The cache isn't valid, we need to take a snapshot.
4145 4148 * Set the command flags appropriately
4146 4149 */
4147 4150 ASSERT(st->command == (DINFOCACHE & DIIOC_MASK));
4148 4151 st->command = (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK);
4149 4152 rval = di_cache_update(st);
4150 4153 st->command = (DINFOCACHE & DIIOC_MASK);
4151 4154 }
4152 4155
4153 4156 DI_CACHE_UNLOCK(di_cache);
4154 4157
4155 4158 /*
4156 4159 * For cached snapshots, the devinfo driver always returns
4157 4160 * a snapshot rooted at "/".
4158 4161 */
4159 4162 ASSERT(rval == 0 || strcmp(DI_ALL_PTR(st)->root_path, "/") == 0);
4160 4163
4161 4164 return ((int)rval);
4162 4165 }
4163 4166
4164 4167 /*
4165 4168 * This is a forced update of the cache - the previous state of the cache
4166 4169 * may be:
4167 4170 * - unpopulated
4168 4171 * - populated and invalid
4169 4172 * - populated and valid
4170 4173 */
4171 4174 static int
4172 4175 di_cache_update(struct di_state *st)
4173 4176 {
4174 4177 int rval;
4175 4178 uint32_t crc;
4176 4179 struct di_all *all;
4177 4180
4178 4181 ASSERT(DI_CACHE_LOCKED(di_cache));
4179 4182 ASSERT(snapshot_is_cacheable(st));
4180 4183
4181 4184 /*
4182 4185 * Free the in-core cache and the on-disk file (if they exist)
4183 4186 */
4184 4187 i_ddi_di_cache_free(&di_cache);
4185 4188
4186 4189 /*
4187 4190 * Set valid flag before taking the snapshot,
4188 4191 * so that any invalidations that arrive
4189 4192 * during or after the snapshot are not
4190 4193 * removed by us.
4191 4194 */
4192 4195 atomic_or_32(&di_cache.cache_valid, 1);
4193 4196
4194 4197 rval = di_snapshot_and_clean(st);
4195 4198
4196 4199 if (rval == 0) {
4197 4200 CACHE_DEBUG((DI_ERR, "can't update cache: bad snapshot"));
4198 4201 return (0);
4199 4202 }
4200 4203
4201 4204 DI_ALL_PTR(st)->map_size = rval;
4202 4205 if (di_mem2cache(st, &di_cache) == 0) {
4203 4206 CACHE_DEBUG((DI_ERR, "can't update cache: copy failed"));
4204 4207 return (0);
4205 4208 }
4206 4209
4207 4210 ASSERT(di_cache.cache_data);
4208 4211 ASSERT(di_cache.cache_size > 0);
4209 4212
4210 4213 /*
4211 4214 * Now that we have cached the snapshot, compute its checksum.
4212 4215 * The checksum is only computed over the valid data in the
4213 4216 * cache, not the entire cache.
4214 4217 * Also, set all the fields (except checksum) before computing
4215 4218 * checksum.
4216 4219 */
4217 4220 all = (struct di_all *)di_cache.cache_data;
4218 4221 all->cache_magic = DI_CACHE_MAGIC;
4219 4222 all->map_size = rval;
4220 4223
4221 4224 ASSERT(all->cache_checksum == 0);
4222 4225 CRC32(crc, di_cache.cache_data, all->map_size, -1U, crc32_table);
4223 4226 all->cache_checksum = crc;
4224 4227
4225 4228 di_cache_write(&di_cache);
4226 4229
4227 4230 return (rval);
4228 4231 }
4229 4232
4230 4233 static void
4231 4234 di_cache_print(di_cache_debug_t msglevel, char *fmt, ...)
4232 4235 {
4233 4236 va_list ap;
4234 4237
4235 4238 if (di_cache_debug <= DI_QUIET)
4236 4239 return;
4237 4240
4238 4241 if (di_cache_debug < msglevel)
4239 4242 return;
4240 4243
4241 4244 switch (msglevel) {
4242 4245 case DI_ERR:
4243 4246 msglevel = CE_WARN;
4244 4247 break;
4245 4248 case DI_INFO:
4246 4249 case DI_TRACE:
4247 4250 default:
4248 4251 msglevel = CE_NOTE;
4249 4252 break;
4250 4253 }
4251 4254
4252 4255 va_start(ap, fmt);
4253 4256 vcmn_err(msglevel, fmt, ap);
4254 4257 va_end(ap);
4255 4258 }
4256 4259
4257 4260 static void
4258 4261 di_hotplug_children(struct di_state *st)
4259 4262 {
4260 4263 di_off_t off;
4261 4264 struct di_hp *hp;
4262 4265 struct i_hp *hp_list_node;
4263 4266
4264 4267 while (hp_list_node = (struct i_hp *)list_remove_head(&st->hp_list)) {
4265 4268
4266 4269 if ((hp_list_node->hp_child != NULL) &&
4267 4270 (di_dip_find(st, hp_list_node->hp_child, &off) == 0)) {
4268 4271 hp = DI_HP(di_mem_addr(st, hp_list_node->hp_off));
4269 4272 hp->hp_child = off;
4270 4273 }
4271 4274
4272 4275 kmem_free(hp_list_node, sizeof (i_hp_t));
4273 4276 }
4274 4277
4275 4278 list_destroy(&st->hp_list);
4276 4279 }
|
↓ open down ↓ |
3660 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX