1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * driver for accessing kernel devinfo tree.
  28  */
  29 #include <sys/types.h>
  30 #include <sys/pathname.h>
  31 #include <sys/debug.h>
  32 #include <sys/autoconf.h>
  33 #include <sys/vmsystm.h>
  34 #include <sys/conf.h>
  35 #include <sys/file.h>
  36 #include <sys/kmem.h>
  37 #include <sys/modctl.h>
  38 #include <sys/stat.h>
  39 #include <sys/ddi.h>
  40 #include <sys/sunddi.h>
  41 #include <sys/sunldi_impl.h>
  42 #include <sys/sunndi.h>
  43 #include <sys/esunddi.h>
  44 #include <sys/sunmdi.h>
  45 #include <sys/ddi_impldefs.h>
  46 #include <sys/ndi_impldefs.h>
  47 #include <sys/mdi_impldefs.h>
  48 #include <sys/devinfo_impl.h>
  49 #include <sys/thread.h>
  50 #include <sys/modhash.h>
  51 #include <sys/bitmap.h>
  52 #include <util/qsort.h>
  53 #include <sys/disp.h>
  54 #include <sys/kobj.h>
  55 #include <sys/crc32.h>
  56 #include <sys/ddi_hp.h>
  57 #include <sys/ddi_hp_impl.h>
  58 #include <sys/sysmacros.h>
  59 #include <sys/list.h>
  60 
  61 
  62 #ifdef DEBUG
  63 static int di_debug;
  64 #define dcmn_err(args) if (di_debug >= 1) cmn_err args
  65 #define dcmn_err2(args) if (di_debug >= 2) cmn_err args
  66 #define dcmn_err3(args) if (di_debug >= 3) cmn_err args
  67 #else
  68 #define dcmn_err(args) /* nothing */
  69 #define dcmn_err2(args) /* nothing */
  70 #define dcmn_err3(args) /* nothing */
  71 #endif
  72 
  73 /*
  74  * We partition the space of devinfo minor nodes equally between the full and
  75  * unprivileged versions of the driver.  The even-numbered minor nodes are the
  76  * full version, while the odd-numbered ones are the read-only version.
  77  */
  78 static int di_max_opens = 32;
  79 
  80 static int di_prop_dyn = 1;             /* enable dynamic property support */
  81 
  82 #define DI_FULL_PARENT          0
  83 #define DI_READONLY_PARENT      1
  84 #define DI_NODE_SPECIES         2
  85 #define DI_UNPRIVILEGED_NODE(x) (((x) % 2) != 0)
  86 
  87 #define IOC_IDLE        0       /* snapshot ioctl states */
  88 #define IOC_SNAP        1       /* snapshot in progress */
  89 #define IOC_DONE        2       /* snapshot done, but not copied out */
  90 #define IOC_COPY        3       /* copyout in progress */
  91 
  92 /*
  93  * Keep max alignment so we can move snapshot to different platforms.
  94  *
  95  * NOTE: Most callers should rely on the di_checkmem return value
  96  * being aligned, and reestablish *off_p with aligned value, instead
  97  * of trying to align size of their allocations: this approach will
  98  * minimize memory use.
  99  */
 100 #define DI_ALIGN(addr)  ((addr + 7l) & ~7l)
 101 
 102 /*
 103  * To avoid wasting memory, make a linked list of memory chunks.
 104  * Size of each chunk is buf_size.
 105  */
 106 struct di_mem {
 107         struct di_mem   *next;          /* link to next chunk */
 108         char            *buf;           /* contiguous kernel memory */
 109         size_t          buf_size;       /* size of buf in bytes */
 110         devmap_cookie_t cook;           /* cookie from ddi_umem_alloc */
 111 };
 112 
 113 /*
 114  * This is a stack for walking the tree without using recursion.
 115  * When the devinfo tree height is above some small size, one
 116  * gets watchdog resets on sun4m.
 117  */
 118 struct di_stack {
 119         void            *offset[MAX_TREE_DEPTH];
 120         struct dev_info *dip[MAX_TREE_DEPTH];
 121         int             circ[MAX_TREE_DEPTH];
 122         int             depth;  /* depth of current node to be copied */
 123 };
 124 
 125 #define TOP_OFFSET(stack)       \
 126         ((di_off_t *)(stack)->offset[(stack)->depth - 1])
 127 #define TOP_NODE(stack)         \
 128         ((stack)->dip[(stack)->depth - 1])
 129 #define PARENT_OFFSET(stack)    \
 130         ((di_off_t *)(stack)->offset[(stack)->depth - 2])
 131 #define EMPTY_STACK(stack)      ((stack)->depth == 0)
 132 #define POP_STACK(stack)        { \
 133         ndi_devi_exit((dev_info_t *)TOP_NODE(stack), \
 134                 (stack)->circ[(stack)->depth - 1]); \
 135         ((stack)->depth--); \
 136 }
 137 #define PUSH_STACK(stack, node, off_p)  { \
 138         ASSERT(node != NULL); \
 139         ndi_devi_enter((dev_info_t *)node, &(stack)->circ[(stack)->depth]); \
 140         (stack)->dip[(stack)->depth] = (node); \
 141         (stack)->offset[(stack)->depth] = (void *)(off_p); \
 142         ((stack)->depth)++; \
 143 }
 144 
 145 #define DI_ALL_PTR(s)   DI_ALL(di_mem_addr((s), 0))
 146 
 147 /*
 148  * With devfs, the device tree has no global locks. The device tree is
 149  * dynamic and dips may come and go if they are not locked locally. Under
 150  * these conditions, pointers are no longer reliable as unique IDs.
 151  * Specifically, these pointers cannot be used as keys for hash tables
 152  * as the same devinfo structure may be freed in one part of the tree only
 153  * to be allocated as the structure for a different device in another
 154  * part of the tree. This can happen if DR and the snapshot are
 155  * happening concurrently.
 156  * The following data structures act as keys for devinfo nodes and
 157  * pathinfo nodes.
 158  */
 159 
 160 enum di_ktype {
 161         DI_DKEY = 1,
 162         DI_PKEY = 2
 163 };
 164 
 165 struct di_dkey {
 166         dev_info_t      *dk_dip;
 167         major_t         dk_major;
 168         int             dk_inst;
 169         pnode_t         dk_nodeid;
 170 };
 171 
 172 struct di_pkey {
 173         mdi_pathinfo_t  *pk_pip;
 174         char            *pk_path_addr;
 175         dev_info_t      *pk_client;
 176         dev_info_t      *pk_phci;
 177 };
 178 
 179 struct di_key {
 180         enum di_ktype   k_type;
 181         union {
 182                 struct di_dkey dkey;
 183                 struct di_pkey pkey;
 184         } k_u;
 185 };
 186 
 187 
 188 struct i_lnode;
 189 
 190 typedef struct i_link {
 191         /*
 192          * If a di_link struct representing this i_link struct makes it
 193          * into the snapshot, then self will point to the offset of
 194          * the di_link struct in the snapshot
 195          */
 196         di_off_t        self;
 197 
 198         int             spec_type;      /* block or char access type */
 199         struct i_lnode  *src_lnode;     /* src i_lnode */
 200         struct i_lnode  *tgt_lnode;     /* tgt i_lnode */
 201         struct i_link   *src_link_next; /* next src i_link /w same i_lnode */
 202         struct i_link   *tgt_link_next; /* next tgt i_link /w same i_lnode */
 203 } i_link_t;
 204 
 205 typedef struct i_lnode {
 206         /*
 207          * If a di_lnode struct representing this i_lnode struct makes it
 208          * into the snapshot, then self will point to the offset of
 209          * the di_lnode struct in the snapshot
 210          */
 211         di_off_t        self;
 212 
 213         /*
 214          * used for hashing and comparing i_lnodes
 215          */
 216         int             modid;
 217 
 218         /*
 219          * public information describing a link endpoint
 220          */
 221         struct di_node  *di_node;       /* di_node in snapshot */
 222         dev_t           devt;           /* devt */
 223 
 224         /*
 225          * i_link ptr to links coming into this i_lnode node
 226          * (this i_lnode is the target of these i_links)
 227          */
 228         i_link_t        *link_in;
 229 
 230         /*
 231          * i_link ptr to links going out of this i_lnode node
 232          * (this i_lnode is the source of these i_links)
 233          */
 234         i_link_t        *link_out;
 235 } i_lnode_t;
 236 
 237 typedef struct i_hp {
 238         di_off_t        hp_off;         /* Offset of di_hp_t in snapshot */
 239         dev_info_t      *hp_child;      /* Child devinfo node of the di_hp_t */
 240         list_node_t     hp_link;        /* List linkage */
 241 } i_hp_t;
 242 
 243 /*
 244  * Soft state associated with each instance of driver open.
 245  */
 246 static struct di_state {
 247         di_off_t        mem_size;       /* total # bytes in memlist */
 248         struct di_mem   *memlist;       /* head of memlist */
 249         uint_t          command;        /* command from ioctl */
 250         int             di_iocstate;    /* snapshot ioctl state */
 251         mod_hash_t      *reg_dip_hash;
 252         mod_hash_t      *reg_pip_hash;
 253         int             lnode_count;
 254         int             link_count;
 255 
 256         mod_hash_t      *lnode_hash;
 257         mod_hash_t      *link_hash;
 258 
 259         list_t          hp_list;
 260 } **di_states;
 261 
 262 static kmutex_t di_lock;        /* serialize instance assignment */
 263 
 264 typedef enum {
 265         DI_QUIET = 0,   /* DI_QUIET must always be 0 */
 266         DI_ERR,
 267         DI_INFO,
 268         DI_TRACE,
 269         DI_TRACE1,
 270         DI_TRACE2
 271 } di_cache_debug_t;
 272 
 273 static uint_t   di_chunk = 32;          /* I/O chunk size in pages */
 274 
 275 #define DI_CACHE_LOCK(c)        (mutex_enter(&(c).cache_lock))
 276 #define DI_CACHE_UNLOCK(c)      (mutex_exit(&(c).cache_lock))
 277 #define DI_CACHE_LOCKED(c)      (mutex_owned(&(c).cache_lock))
 278 
 279 /*
 280  * Check that whole device tree is being configured as a pre-condition for
 281  * cleaning up /etc/devices files.
 282  */
 283 #define DEVICES_FILES_CLEANABLE(st)     \
 284         (((st)->command & DINFOSUBTREE) && ((st)->command & DINFOFORCE) && \
 285         strcmp(DI_ALL_PTR(st)->root_path, "/") == 0)
 286 
 287 #define CACHE_DEBUG(args)       \
 288         { if (di_cache_debug != DI_QUIET) di_cache_print args; }
 289 
 290 typedef struct phci_walk_arg {
 291         di_off_t        off;
 292         struct di_state *st;
 293 } phci_walk_arg_t;
 294 
 295 static int di_open(dev_t *, int, int, cred_t *);
 296 static int di_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
 297 static int di_close(dev_t, int, int, cred_t *);
 298 static int di_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
 299 static int di_attach(dev_info_t *, ddi_attach_cmd_t);
 300 static int di_detach(dev_info_t *, ddi_detach_cmd_t);
 301 
 302 static di_off_t di_copyformat(di_off_t, struct di_state *, intptr_t, int);
 303 static di_off_t di_snapshot_and_clean(struct di_state *);
 304 static di_off_t di_copydevnm(di_off_t *, struct di_state *);
 305 static di_off_t di_copytree(struct dev_info *, di_off_t *, struct di_state *);
 306 static di_off_t di_copynode(struct dev_info *, struct di_stack *,
 307     struct di_state *);
 308 static di_off_t di_getmdata(struct ddi_minor_data *, di_off_t *, di_off_t,
 309     struct di_state *);
 310 static di_off_t di_getppdata(struct dev_info *, di_off_t *, struct di_state *);
 311 static di_off_t di_getdpdata(struct dev_info *, di_off_t *, struct di_state *);
 312 static di_off_t di_gethpdata(ddi_hp_cn_handle_t *, di_off_t *,
 313     struct di_state *);
 314 static di_off_t di_getprop(int, struct ddi_prop **, di_off_t *,
 315     struct di_state *, struct dev_info *);
 316 static void di_allocmem(struct di_state *, size_t);
 317 static void di_freemem(struct di_state *);
 318 static void di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz);
 319 static di_off_t di_checkmem(struct di_state *, di_off_t, size_t);
 320 static void *di_mem_addr(struct di_state *, di_off_t);
 321 static int di_setstate(struct di_state *, int);
 322 static void di_register_dip(struct di_state *, dev_info_t *, di_off_t);
 323 static void di_register_pip(struct di_state *, mdi_pathinfo_t *, di_off_t);
 324 static di_off_t di_getpath_data(dev_info_t *, di_off_t *, di_off_t,
 325     struct di_state *, int);
 326 static di_off_t di_getlink_data(di_off_t, struct di_state *);
 327 static int di_dip_find(struct di_state *st, dev_info_t *node, di_off_t *off_p);
 328 
 329 static int cache_args_valid(struct di_state *st, int *error);
 330 static int snapshot_is_cacheable(struct di_state *st);
 331 static int di_cache_lookup(struct di_state *st);
 332 static int di_cache_update(struct di_state *st);
 333 static void di_cache_print(di_cache_debug_t msglevel, char *fmt, ...);
 334 static int build_vhci_list(dev_info_t *vh_devinfo, void *arg);
 335 static int build_phci_list(dev_info_t *ph_devinfo, void *arg);
 336 static void di_hotplug_children(struct di_state *st);
 337 
 338 extern int modrootloaded;
 339 extern void mdi_walk_vhcis(int (*)(dev_info_t *, void *), void *);
 340 extern void mdi_vhci_walk_phcis(dev_info_t *,
 341         int (*)(dev_info_t *, void *), void *);
 342 
 343 
 344 static struct cb_ops di_cb_ops = {
 345         di_open,                /* open */
 346         di_close,               /* close */
 347         nodev,                  /* strategy */
 348         nodev,                  /* print */
 349         nodev,                  /* dump */
 350         nodev,                  /* read */
 351         nodev,                  /* write */
 352         di_ioctl,               /* ioctl */
 353         nodev,                  /* devmap */
 354         nodev,                  /* mmap */
 355         nodev,                  /* segmap */
 356         nochpoll,               /* poll */
 357         ddi_prop_op,            /* prop_op */
 358         NULL,                   /* streamtab  */
 359         D_NEW | D_MP            /* Driver compatibility flag */
 360 };
 361 
 362 static struct dev_ops di_ops = {
 363         DEVO_REV,               /* devo_rev, */
 364         0,                      /* refcnt  */
 365         di_info,                /* info */
 366         nulldev,                /* identify */
 367         nulldev,                /* probe */
 368         di_attach,              /* attach */
 369         di_detach,              /* detach */
 370         nodev,                  /* reset */
 371         &di_cb_ops,         /* driver operations */
 372         NULL                    /* bus operations */
 373 };
 374 
 375 /*
 376  * Module linkage information for the kernel.
 377  */
 378 static struct modldrv modldrv = {
 379         &mod_driverops,
 380         "DEVINFO Driver",
 381         &di_ops
 382 };
 383 
 384 static struct modlinkage modlinkage = {
 385         MODREV_1,
 386         &modldrv,
 387         NULL
 388 };
 389 
 390 int
 391 _init(void)
 392 {
 393         int     error;
 394 
 395         mutex_init(&di_lock, NULL, MUTEX_DRIVER, NULL);
 396 
 397         error = mod_install(&modlinkage);
 398         if (error != 0) {
 399                 mutex_destroy(&di_lock);
 400                 return (error);
 401         }
 402 
 403         return (0);
 404 }
 405 
 406 int
 407 _info(struct modinfo *modinfop)
 408 {
 409         return (mod_info(&modlinkage, modinfop));
 410 }
 411 
 412 int
 413 _fini(void)
 414 {
 415         int     error;
 416 
 417         error = mod_remove(&modlinkage);
 418         if (error != 0) {
 419                 return (error);
 420         }
 421 
 422         mutex_destroy(&di_lock);
 423         return (0);
 424 }
 425 
 426 static dev_info_t *di_dip;
 427 
 428 /*ARGSUSED*/
 429 static int
 430 di_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
 431 {
 432         int     error = DDI_FAILURE;
 433 
 434         switch (infocmd) {
 435         case DDI_INFO_DEVT2DEVINFO:
 436                 *result = (void *)di_dip;
 437                 error = DDI_SUCCESS;
 438                 break;
 439         case DDI_INFO_DEVT2INSTANCE:
 440                 /*
 441                  * All dev_t's map to the same, single instance.
 442                  */
 443                 *result = (void *)0;
 444                 error = DDI_SUCCESS;
 445                 break;
 446         default:
 447                 break;
 448         }
 449 
 450         return (error);
 451 }
 452 
 453 static int
 454 di_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 455 {
 456         int     error = DDI_FAILURE;
 457 
 458         switch (cmd) {
 459         case DDI_ATTACH:
 460                 di_states = kmem_zalloc(
 461                     di_max_opens * sizeof (struct di_state *), KM_SLEEP);
 462 
 463                 if (ddi_create_minor_node(dip, "devinfo", S_IFCHR,
 464                     DI_FULL_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE ||
 465                     ddi_create_minor_node(dip, "devinfo,ro", S_IFCHR,
 466                     DI_READONLY_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE) {
 467                         kmem_free(di_states,
 468                             di_max_opens * sizeof (struct di_state *));
 469                         ddi_remove_minor_node(dip, NULL);
 470                         error = DDI_FAILURE;
 471                 } else {
 472                         di_dip = dip;
 473                         ddi_report_dev(dip);
 474 
 475                         error = DDI_SUCCESS;
 476                 }
 477                 break;
 478         default:
 479                 error = DDI_FAILURE;
 480                 break;
 481         }
 482 
 483         return (error);
 484 }
 485 
 486 static int
 487 di_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 488 {
 489         int     error = DDI_FAILURE;
 490 
 491         switch (cmd) {
 492         case DDI_DETACH:
 493                 ddi_remove_minor_node(dip, NULL);
 494                 di_dip = NULL;
 495                 kmem_free(di_states, di_max_opens * sizeof (struct di_state *));
 496 
 497                 error = DDI_SUCCESS;
 498                 break;
 499         default:
 500                 error = DDI_FAILURE;
 501                 break;
 502         }
 503 
 504         return (error);
 505 }
 506 
 507 /*
 508  * Allow multiple opens by tweaking the dev_t such that it looks like each
 509  * open is getting a different minor device.  Each minor gets a separate
 510  * entry in the di_states[] table.  Based on the original minor number, we
 511  * discriminate opens of the full and read-only nodes.  If all of the instances
 512  * of the selected minor node are currently open, we return EAGAIN.
 513  */
 514 /*ARGSUSED*/
 515 static int
 516 di_open(dev_t *devp, int flag, int otyp, cred_t *credp)
 517 {
 518         int     m;
 519         minor_t minor_parent = getminor(*devp);
 520 
 521         if (minor_parent != DI_FULL_PARENT &&
 522             minor_parent != DI_READONLY_PARENT)
 523                 return (ENXIO);
 524 
 525         mutex_enter(&di_lock);
 526 
 527         for (m = minor_parent; m < di_max_opens; m += DI_NODE_SPECIES) {
 528                 if (di_states[m] != NULL)
 529                         continue;
 530 
 531                 di_states[m] = kmem_zalloc(sizeof (struct di_state), KM_SLEEP);
 532                 break;  /* It's ours. */
 533         }
 534 
 535         if (m >= di_max_opens) {
 536                 /*
 537                  * maximum open instance for device reached
 538                  */
 539                 mutex_exit(&di_lock);
 540                 dcmn_err((CE_WARN, "devinfo: maximum devinfo open reached"));
 541                 return (EAGAIN);
 542         }
 543         mutex_exit(&di_lock);
 544 
 545         ASSERT(m < di_max_opens);
 546         *devp = makedevice(getmajor(*devp), (minor_t)(m + DI_NODE_SPECIES));
 547 
 548         dcmn_err((CE_CONT, "di_open: thread = %p, assigned minor = %d\n",
 549             (void *)curthread, m + DI_NODE_SPECIES));
 550 
 551         return (0);
 552 }
 553 
 554 /*ARGSUSED*/
 555 static int
 556 di_close(dev_t dev, int flag, int otype, cred_t *cred_p)
 557 {
 558         struct di_state *st;
 559         int             m = (int)getminor(dev) - DI_NODE_SPECIES;
 560 
 561         if (m < 0 || m >= di_max_opens) {
 562                 cmn_err(CE_WARN, "closing non-existent devinfo minor %d",
 563                     m + DI_NODE_SPECIES);
 564                 return (ENXIO);
 565         }
 566 
 567         /*
 568          * empty slot in state table
 569          */
 570         mutex_enter(&di_lock);
 571         st = di_states[m];
 572         di_states[m] = NULL;
 573         mutex_exit(&di_lock);
 574 
 575         if (st != NULL) {
 576                 di_freemem(st);
 577                 kmem_free(st, sizeof (struct di_state));
 578         }
 579 
 580         dcmn_err((CE_CONT, "di_close: thread = %p, assigned minor = %d\n",
 581             (void *)curthread, m + DI_NODE_SPECIES));
 582 
 583         return (0);
 584 }
 585 
 586 
 587 /*ARGSUSED*/
 588 static int
 589 di_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
 590 {
 591         int             rv, error;
 592         di_off_t        off;
 593         struct di_all   *all;
 594         struct di_state *st;
 595         int             m = (int)getminor(dev) - DI_NODE_SPECIES;
 596         major_t         i;
 597         char            *drv_name;
 598         size_t          map_size, size;
 599         struct di_mem   *dcp;
 600         int             ndi_flags;
 601 
 602         if (m < 0 || m >= di_max_opens) {
 603                 return (ENXIO);
 604         }
 605 
 606         st = di_states[m];
 607         if(st == NULL) {
 608                 return (ENXIO);
 609         }
 610 
 611         dcmn_err2((CE_CONT, "di_ioctl: mode = %x, cmd = %x\n", mode, cmd));
 612 
 613         switch (cmd) {
 614         case DINFOIDENT:
 615                 /*
 616                  * This is called from di_init to verify that the driver
 617                  * opened is indeed devinfo. The purpose is to guard against
 618                  * sending ioctl to an unknown driver in case of an
 619                  * unresolved major number conflict during bfu.
 620                  */
 621                 *rvalp = DI_MAGIC;
 622                 return (0);
 623 
 624         case DINFOLODRV:
 625                 /*
 626                  * Hold an installed driver and return the result
 627                  */
 628                 if (DI_UNPRIVILEGED_NODE(m)) {
 629                         /*
 630                          * Only the fully enabled instances may issue
 631                          * DINFOLDDRV.
 632                          */
 633                         return (EACCES);
 634                 }
 635 
 636                 drv_name = kmem_alloc(MAXNAMELEN, KM_SLEEP);
 637                 if (ddi_copyin((void *)arg, drv_name, MAXNAMELEN, mode) != 0) {
 638                         kmem_free(drv_name, MAXNAMELEN);
 639                         return (EFAULT);
 640                 }
 641 
 642                 /*
 643                  * Some 3rd party driver's _init() walks the device tree,
 644                  * so we load the driver module before configuring driver.
 645                  */
 646                 i = ddi_name_to_major(drv_name);
 647                 if (ddi_hold_driver(i) == NULL) {
 648                         kmem_free(drv_name, MAXNAMELEN);
 649                         return (ENXIO);
 650                 }
 651 
 652                 ndi_flags = NDI_DEVI_PERSIST | NDI_CONFIG | NDI_NO_EVENT;
 653 
 654                 /*
 655                  * i_ddi_load_drvconf() below will trigger a reprobe
 656                  * via reset_nexus_flags(). NDI_DRV_CONF_REPROBE isn't
 657                  * needed here.
 658                  */
 659                 modunload_disable();
 660                 (void) i_ddi_load_drvconf(i);
 661                 (void) ndi_devi_config_driver(ddi_root_node(), ndi_flags, i);
 662                 kmem_free(drv_name, MAXNAMELEN);
 663                 ddi_rele_driver(i);
 664                 rv = i_ddi_devs_attached(i);
 665                 modunload_enable();
 666 
 667                 i_ddi_di_cache_invalidate();
 668 
 669                 return ((rv == DDI_SUCCESS)? 0 : ENXIO);
 670 
 671         case DINFOUSRLD:
 672                 /*
 673                  * The case for copying snapshot to userland
 674                  */
 675                 if (di_setstate(st, IOC_COPY) == -1)
 676                         return (EBUSY);
 677 
 678                 map_size = DI_ALL_PTR(st)->map_size;
 679                 if (map_size == 0) {
 680                         (void) di_setstate(st, IOC_DONE);
 681                         return (EFAULT);
 682                 }
 683 
 684                 /*
 685                  * copyout the snapshot
 686                  */
 687                 map_size = (map_size + PAGEOFFSET) & PAGEMASK;
 688 
 689                 /*
 690                  * Return the map size, so caller may do a sanity
 691                  * check against the return value of snapshot ioctl()
 692                  */
 693                 *rvalp = (int)map_size;
 694 
 695                 /*
 696                  * Copy one chunk at a time
 697                  */
 698                 off = 0;
 699                 dcp = st->memlist;
 700                 while (map_size) {
 701                         size = dcp->buf_size;
 702                         if (map_size <= size) {
 703                                 size = map_size;
 704                         }
 705 
 706                         if (ddi_copyout(di_mem_addr(st, off),
 707                             (void *)(arg + off), size, mode) != 0) {
 708                                 (void) di_setstate(st, IOC_DONE);
 709                                 return (EFAULT);
 710                         }
 711 
 712                         map_size -= size;
 713                         off += size;
 714                         dcp = dcp->next;
 715                 }
 716 
 717                 di_freemem(st);
 718                 (void) di_setstate(st, IOC_IDLE);
 719                 return (0);
 720 
 721         default:
 722                 if ((cmd & ~DIIOC_MASK) != DIIOC) {
 723                         /*
 724                          * Invalid ioctl command
 725                          */
 726                         return (ENOTTY);
 727                 }
 728                 /*
 729                  * take a snapshot
 730                  */
 731                 st->command = cmd & DIIOC_MASK;
 732                 /*FALLTHROUGH*/
 733         }
 734 
 735         /*
 736          * Obtain enough memory to hold header + rootpath.  We prevent kernel
 737          * memory exhaustion by freeing any previously allocated snapshot and
 738          * refusing the operation; otherwise we would be allowing ioctl(),
 739          * ioctl(), ioctl(), ..., panic.
 740          */
 741         if (di_setstate(st, IOC_SNAP) == -1)
 742                 return (EBUSY);
 743 
 744         /*
 745          * Initial memlist always holds di_all and the root_path - and
 746          * is at least a page and size.
 747          */
 748         size = sizeof (struct di_all) +
 749             sizeof (((struct dinfo_io *)(NULL))->root_path);
 750         if (size < PAGESIZE)
 751                 size = PAGESIZE;
 752         off = di_checkmem(st, 0, size);
 753         all = DI_ALL_PTR(st);
 754         off += sizeof (struct di_all);          /* real length of di_all */
 755 
 756         all->devcnt = devcnt;
 757         all->command = st->command;
 758         all->version = DI_SNAPSHOT_VERSION;
 759         all->top_vhci_devinfo = 0;           /* filled by build_vhci_list. */
 760 
 761         /*
 762          * Note the endianness in case we need to transport snapshot
 763          * over the network.
 764          */
 765 #if defined(_LITTLE_ENDIAN)
 766         all->endianness = DI_LITTLE_ENDIAN;
 767 #else
 768         all->endianness = DI_BIG_ENDIAN;
 769 #endif
 770 
 771         /* Copyin ioctl args, store in the snapshot. */
 772         if (copyinstr((void *)arg, all->req_path,
 773             sizeof (((struct dinfo_io *)(NULL))->root_path), &size) != 0) {
 774                 di_freemem(st);
 775                 (void) di_setstate(st, IOC_IDLE);
 776                 return (EFAULT);
 777         }
 778         (void) strcpy(all->root_path, all->req_path);
 779         off += size;                            /* real length of root_path */
 780 
 781         if ((st->command & DINFOCLEANUP) && !DEVICES_FILES_CLEANABLE(st)) {
 782                 di_freemem(st);
 783                 (void) di_setstate(st, IOC_IDLE);
 784                 return (EINVAL);
 785         }
 786 
 787         error = 0;
 788         if ((st->command & DINFOCACHE) && !cache_args_valid(st, &error)) {
 789                 di_freemem(st);
 790                 (void) di_setstate(st, IOC_IDLE);
 791                 return (error);
 792         }
 793 
 794         /*
 795          * Only the fully enabled version may force load drivers or read
 796          * the parent private data from a driver.
 797          */
 798         if ((st->command & (DINFOPRIVDATA | DINFOFORCE)) != 0 &&
 799             DI_UNPRIVILEGED_NODE(m)) {
 800                 di_freemem(st);
 801                 (void) di_setstate(st, IOC_IDLE);
 802                 return (EACCES);
 803         }
 804 
 805         /* Do we need private data? */
 806         if (st->command & DINFOPRIVDATA) {
 807                 arg += sizeof (((struct dinfo_io *)(NULL))->root_path);
 808 
 809 #ifdef _MULTI_DATAMODEL
 810                 switch (ddi_model_convert_from(mode & FMODELS)) {
 811                 case DDI_MODEL_ILP32: {
 812                         /*
 813                          * Cannot copy private data from 64-bit kernel
 814                          * to 32-bit app
 815                          */
 816                         di_freemem(st);
 817                         (void) di_setstate(st, IOC_IDLE);
 818                         return (EINVAL);
 819                 }
 820                 case DDI_MODEL_NONE:
 821                         if ((off = di_copyformat(off, st, arg, mode)) == 0) {
 822                                 di_freemem(st);
 823                                 (void) di_setstate(st, IOC_IDLE);
 824                                 return (EFAULT);
 825                         }
 826                         break;
 827                 }
 828 #else /* !_MULTI_DATAMODEL */
 829                 if ((off = di_copyformat(off, st, arg, mode)) == 0) {
 830                         di_freemem(st);
 831                         (void) di_setstate(st, IOC_IDLE);
 832                         return (EFAULT);
 833                 }
 834 #endif /* _MULTI_DATAMODEL */
 835         }
 836 
 837         all->top_devinfo = DI_ALIGN(off);
 838 
 839         /*
 840          * For cache lookups we reallocate memory from scratch,
 841          * so the value of "all" is no longer valid.
 842          */
 843         all = NULL;
 844 
 845         if (st->command & DINFOCACHE) {
 846                 *rvalp = di_cache_lookup(st);
 847         } else if (snapshot_is_cacheable(st)) {
 848                 DI_CACHE_LOCK(di_cache);
 849                 *rvalp = di_cache_update(st);
 850                 DI_CACHE_UNLOCK(di_cache);
 851         } else
 852                 *rvalp = di_snapshot_and_clean(st);
 853 
 854         if (*rvalp) {
 855                 DI_ALL_PTR(st)->map_size = *rvalp;
 856                 (void) di_setstate(st, IOC_DONE);
 857         } else {
 858                 di_freemem(st);
 859                 (void) di_setstate(st, IOC_IDLE);
 860         }
 861 
 862         return (0);
 863 }
 864 
 865 /*
 866  * Get a chunk of memory >= size, for the snapshot
 867  */
 868 static void
 869 di_allocmem(struct di_state *st, size_t size)
 870 {
 871         struct di_mem   *mem = kmem_zalloc(sizeof (struct di_mem), KM_SLEEP);
 872 
 873         /*
 874          * Round up size to nearest power of 2. If it is less
 875          * than st->mem_size, set it to st->mem_size (i.e.,
 876          * the mem_size is doubled every time) to reduce the
 877          * number of memory allocations.
 878          */
 879         size_t tmp = 1;
 880         while (tmp < size) {
 881                 tmp <<= 1;
 882         }
 883         size = (tmp > st->mem_size) ? tmp : st->mem_size;
 884 
 885         mem->buf = ddi_umem_alloc(size, DDI_UMEM_SLEEP, &mem->cook);
 886         mem->buf_size = size;
 887 
 888         dcmn_err2((CE_CONT, "di_allocmem: mem_size=%x\n", st->mem_size));
 889 
 890         if (st->mem_size == 0) {     /* first chunk */
 891                 st->memlist = mem;
 892         } else {
 893                 /*
 894                  * locate end of linked list and add a chunk at the end
 895                  */
 896                 struct di_mem *dcp = st->memlist;
 897                 while (dcp->next != NULL) {
 898                         dcp = dcp->next;
 899                 }
 900 
 901                 dcp->next = mem;
 902         }
 903 
 904         st->mem_size += size;
 905 }
 906 
 907 /*
 908  * Copy upto bufsiz bytes of the memlist to buf
 909  */
 910 static void
 911 di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz)
 912 {
 913         struct di_mem   *dcp;
 914         size_t          copysz;
 915 
 916         if (st->mem_size == 0) {
 917                 ASSERT(st->memlist == NULL);
 918                 return;
 919         }
 920 
 921         copysz = 0;
 922         for (dcp = st->memlist; dcp; dcp = dcp->next) {
 923 
 924                 ASSERT(bufsiz > 0);
 925 
 926                 if (bufsiz <= dcp->buf_size)
 927                         copysz = bufsiz;
 928                 else
 929                         copysz = dcp->buf_size;
 930 
 931                 bcopy(dcp->buf, buf, copysz);
 932 
 933                 buf += copysz;
 934                 bufsiz -= copysz;
 935 
 936                 if (bufsiz == 0)
 937                         break;
 938         }
 939 }
 940 
 941 /*
 942  * Free all memory for the snapshot
 943  */
 944 static void
 945 di_freemem(struct di_state *st)
 946 {
 947         struct di_mem   *dcp, *tmp;
 948 
 949         dcmn_err2((CE_CONT, "di_freemem\n"));
 950 
 951         if (st->mem_size) {
 952                 dcp = st->memlist;
 953                 while (dcp) {   /* traverse the linked list */
 954                         tmp = dcp;
 955                         dcp = dcp->next;
 956                         ddi_umem_free(tmp->cook);
 957                         kmem_free(tmp, sizeof (struct di_mem));
 958                 }
 959                 st->mem_size = 0;
 960                 st->memlist = NULL;
 961         }
 962 
 963         ASSERT(st->mem_size == 0);
 964         ASSERT(st->memlist == NULL);
 965 }
 966 
 967 /*
 968  * Copies cached data to the di_state structure.
 969  * Returns:
 970  *      - size of data copied, on SUCCESS
 971  *      - 0 on failure
 972  */
 973 static int
 974 di_cache2mem(struct di_cache *cache, struct di_state *st)
 975 {
 976         caddr_t pa;
 977 
 978         ASSERT(st->mem_size == 0);
 979         ASSERT(st->memlist == NULL);
 980         ASSERT(!servicing_interrupt());
 981         ASSERT(DI_CACHE_LOCKED(*cache));
 982 
 983         if (cache->cache_size == 0) {
 984                 ASSERT(cache->cache_data == NULL);
 985                 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping copy"));
 986                 return (0);
 987         }
 988 
 989         ASSERT(cache->cache_data);
 990 
 991         di_allocmem(st, cache->cache_size);
 992 
 993         pa = di_mem_addr(st, 0);
 994 
 995         ASSERT(pa);
 996 
 997         /*
 998          * Verify that di_allocmem() allocates contiguous memory,
 999          * so that it is safe to do straight bcopy()
1000          */
1001         ASSERT(st->memlist != NULL);
1002         ASSERT(st->memlist->next == NULL);
1003         bcopy(cache->cache_data, pa, cache->cache_size);
1004 
1005         return (cache->cache_size);
1006 }
1007 
1008 /*
1009  * Copies a snapshot from di_state to the cache
1010  * Returns:
1011  *      - 0 on failure
1012  *      - size of copied data on success
1013  */
1014 static size_t
1015 di_mem2cache(struct di_state *st, struct di_cache *cache)
1016 {
1017         size_t  map_size;
1018 
1019         ASSERT(cache->cache_size == 0);
1020         ASSERT(cache->cache_data == NULL);
1021         ASSERT(!servicing_interrupt());
1022         ASSERT(DI_CACHE_LOCKED(*cache));
1023 
1024         if (st->mem_size == 0) {
1025                 ASSERT(st->memlist == NULL);
1026                 CACHE_DEBUG((DI_ERR, "Empty memlist. Skipping copy"));
1027                 return (0);
1028         }
1029 
1030         ASSERT(st->memlist);
1031 
1032         /*
1033          * The size of the memory list may be much larger than the
1034          * size of valid data (map_size). Cache only the valid data
1035          */
1036         map_size = DI_ALL_PTR(st)->map_size;
1037         if (map_size == 0 || map_size < sizeof (struct di_all) ||
1038             map_size > st->mem_size) {
1039                 CACHE_DEBUG((DI_ERR, "cannot cache: bad size: 0x%x", map_size));
1040                 return (0);
1041         }
1042 
1043         cache->cache_data = kmem_alloc(map_size, KM_SLEEP);
1044         cache->cache_size = map_size;
1045         di_copymem(st, cache->cache_data, cache->cache_size);
1046 
1047         return (map_size);
1048 }
1049 
1050 /*
1051  * Make sure there is at least "size" bytes memory left before
1052  * going on. Otherwise, start on a new chunk.
1053  */
1054 static di_off_t
1055 di_checkmem(struct di_state *st, di_off_t off, size_t size)
1056 {
1057         dcmn_err3((CE_CONT, "di_checkmem: off=%x size=%x\n",
1058             off, (int)size));
1059 
1060         /*
1061          * di_checkmem() shouldn't be called with a size of zero.
1062          * But in case it is, we want to make sure we return a valid
1063          * offset within the memlist and not an offset that points us
1064          * at the end of the memlist.
1065          */
1066         if (size == 0) {
1067                 dcmn_err((CE_WARN, "di_checkmem: invalid zero size used"));
1068                 size = 1;
1069         }
1070 
1071         off = DI_ALIGN(off);
1072         if ((st->mem_size - off) < size) {
1073                 off = st->mem_size;
1074                 di_allocmem(st, size);
1075         }
1076 
1077         /* verify that return value is aligned */
1078         ASSERT(off == DI_ALIGN(off));
1079         return (off);
1080 }
1081 
1082 /*
1083  * Copy the private data format from ioctl arg.
1084  * On success, the ending offset is returned. On error 0 is returned.
1085  */
1086 static di_off_t
1087 di_copyformat(di_off_t off, struct di_state *st, intptr_t arg, int mode)
1088 {
1089         di_off_t                size;
1090         struct di_priv_data     *priv;
1091         struct di_all           *all = DI_ALL_PTR(st);
1092 
1093         dcmn_err2((CE_CONT, "di_copyformat: off=%x, arg=%p mode=%x\n",
1094             off, (void *)arg, mode));
1095 
1096         /*
1097          * Copyin data and check version.
1098          * We only handle private data version 0.
1099          */
1100         priv = kmem_alloc(sizeof (struct di_priv_data), KM_SLEEP);
1101         if ((ddi_copyin((void *)arg, priv, sizeof (struct di_priv_data),
1102             mode) != 0) || (priv->version != DI_PRIVDATA_VERSION_0)) {
1103                 kmem_free(priv, sizeof (struct di_priv_data));
1104                 return (0);
1105         }
1106 
1107         /*
1108          * Save di_priv_data copied from userland in snapshot.
1109          */
1110         all->pd_version = priv->version;
1111         all->n_ppdata = priv->n_parent;
1112         all->n_dpdata = priv->n_driver;
1113 
1114         /*
1115          * copyin private data format, modify offset accordingly
1116          */
1117         if (all->n_ppdata) { /* parent private data format */
1118                 /*
1119                  * check memory
1120                  */
1121                 size = all->n_ppdata * sizeof (struct di_priv_format);
1122                 all->ppdata_format = off = di_checkmem(st, off, size);
1123                 if (ddi_copyin(priv->parent, di_mem_addr(st, off), size,
1124                     mode) != 0) {
1125                         kmem_free(priv, sizeof (struct di_priv_data));
1126                         return (0);
1127                 }
1128 
1129                 off += size;
1130         }
1131 
1132         if (all->n_dpdata) { /* driver private data format */
1133                 /*
1134                  * check memory
1135                  */
1136                 size = all->n_dpdata * sizeof (struct di_priv_format);
1137                 all->dpdata_format = off = di_checkmem(st, off, size);
1138                 if (ddi_copyin(priv->driver, di_mem_addr(st, off), size,
1139                     mode) != 0) {
1140                         kmem_free(priv, sizeof (struct di_priv_data));
1141                         return (0);
1142                 }
1143 
1144                 off += size;
1145         }
1146 
1147         kmem_free(priv, sizeof (struct di_priv_data));
1148         return (off);
1149 }
1150 
1151 /*
1152  * Return the real address based on the offset (off) within snapshot
1153  */
1154 static void *
1155 di_mem_addr(struct di_state *st, di_off_t off)
1156 {
1157         struct di_mem   *dcp = st->memlist;
1158 
1159         dcmn_err3((CE_CONT, "di_mem_addr: dcp=%p off=%x\n",
1160             (void *)dcp, off));
1161 
1162         ASSERT(off < st->mem_size);
1163 
1164         while (off >= dcp->buf_size) {
1165                 off -= dcp->buf_size;
1166                 dcp = dcp->next;
1167         }
1168 
1169         dcmn_err3((CE_CONT, "di_mem_addr: new off=%x, return = %p\n",
1170             off, (void *)(dcp->buf + off)));
1171 
1172         return (dcp->buf + off);
1173 }
1174 
1175 /*
1176  * Ideally we would use the whole key to derive the hash
1177  * value. However, the probability that two keys will
1178  * have the same dip (or pip) is very low, so
1179  * hashing by dip (or pip) pointer should suffice.
1180  */
1181 static uint_t
1182 di_hash_byptr(void *arg, mod_hash_key_t key)
1183 {
1184         struct di_key   *dik = key;
1185         size_t          rshift;
1186         void            *ptr;
1187 
1188         ASSERT(arg == NULL);
1189 
1190         switch (dik->k_type) {
1191         case DI_DKEY:
1192                 ptr = dik->k_u.dkey.dk_dip;
1193                 rshift = highbit(sizeof (struct dev_info));
1194                 break;
1195         case DI_PKEY:
1196                 ptr = dik->k_u.pkey.pk_pip;
1197                 rshift = highbit(sizeof (struct mdi_pathinfo));
1198                 break;
1199         default:
1200                 panic("devinfo: unknown key type");
1201                 /*NOTREACHED*/
1202         }
1203         return (mod_hash_byptr((void *)rshift, ptr));
1204 }
1205 
1206 static void
1207 di_key_dtor(mod_hash_key_t key)
1208 {
1209         char            *path_addr;
1210         struct di_key   *dik = key;
1211 
1212         switch (dik->k_type) {
1213         case DI_DKEY:
1214                 break;
1215         case DI_PKEY:
1216                 path_addr = dik->k_u.pkey.pk_path_addr;
1217                 if (path_addr)
1218                         kmem_free(path_addr, strlen(path_addr) + 1);
1219                 break;
1220         default:
1221                 panic("devinfo: unknown key type");
1222                 /*NOTREACHED*/
1223         }
1224 
1225         kmem_free(dik, sizeof (struct di_key));
1226 }
1227 
1228 static int
1229 di_dkey_cmp(struct di_dkey *dk1, struct di_dkey *dk2)
1230 {
1231         if (dk1->dk_dip !=  dk2->dk_dip)
1232                 return (dk1->dk_dip > dk2->dk_dip ? 1 : -1);
1233 
1234         if (dk1->dk_major != DDI_MAJOR_T_NONE &&
1235             dk2->dk_major != DDI_MAJOR_T_NONE) {
1236                 if (dk1->dk_major !=  dk2->dk_major)
1237                         return (dk1->dk_major > dk2->dk_major ? 1 : -1);
1238 
1239                 if (dk1->dk_inst !=  dk2->dk_inst)
1240                         return (dk1->dk_inst > dk2->dk_inst ? 1 : -1);
1241         }
1242 
1243         if (dk1->dk_nodeid != dk2->dk_nodeid)
1244                 return (dk1->dk_nodeid > dk2->dk_nodeid ? 1 : -1);
1245 
1246         return (0);
1247 }
1248 
1249 static int
1250 di_pkey_cmp(struct di_pkey *pk1, struct di_pkey *pk2)
1251 {
1252         char    *p1, *p2;
1253         int     rv;
1254 
1255         if (pk1->pk_pip !=  pk2->pk_pip)
1256                 return (pk1->pk_pip > pk2->pk_pip ? 1 : -1);
1257 
1258         p1 = pk1->pk_path_addr;
1259         p2 = pk2->pk_path_addr;
1260 
1261         p1 = p1 ? p1 : "";
1262         p2 = p2 ? p2 : "";
1263 
1264         rv = strcmp(p1, p2);
1265         if (rv)
1266                 return (rv > 0  ? 1 : -1);
1267 
1268         if (pk1->pk_client !=  pk2->pk_client)
1269                 return (pk1->pk_client > pk2->pk_client ? 1 : -1);
1270 
1271         if (pk1->pk_phci !=  pk2->pk_phci)
1272                 return (pk1->pk_phci > pk2->pk_phci ? 1 : -1);
1273 
1274         return (0);
1275 }
1276 
1277 static int
1278 di_key_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
1279 {
1280         struct di_key   *dik1, *dik2;
1281 
1282         dik1 = key1;
1283         dik2 = key2;
1284 
1285         if (dik1->k_type != dik2->k_type) {
1286                 panic("devinfo: mismatched keys");
1287                 /*NOTREACHED*/
1288         }
1289 
1290         switch (dik1->k_type) {
1291         case DI_DKEY:
1292                 return (di_dkey_cmp(&(dik1->k_u.dkey), &(dik2->k_u.dkey)));
1293         case DI_PKEY:
1294                 return (di_pkey_cmp(&(dik1->k_u.pkey), &(dik2->k_u.pkey)));
1295         default:
1296                 panic("devinfo: unknown key type");
1297                 /*NOTREACHED*/
1298         }
1299 }
1300 
1301 static void
1302 di_copy_aliases(struct di_state *st, alias_pair_t *apair, di_off_t *offp)
1303 {
1304         di_off_t                off;
1305         struct di_all           *all = DI_ALL_PTR(st);
1306         struct di_alias         *di_alias;
1307         di_off_t                curroff;
1308         dev_info_t              *currdip;
1309         size_t                  size;
1310 
1311         currdip = NULL;
1312         if (resolve_pathname(apair->pair_alias, &currdip, NULL, NULL) != 0) {
1313                 return;
1314         }
1315 
1316         if (di_dip_find(st, currdip, &curroff) != 0) {
1317                 ndi_rele_devi(currdip);
1318                 return;
1319         }
1320         ndi_rele_devi(currdip);
1321 
1322         off = *offp;
1323         size = sizeof (struct di_alias);
1324         size += strlen(apair->pair_alias) + 1;
1325         off = di_checkmem(st, off, size);
1326         di_alias = DI_ALIAS(di_mem_addr(st, off));
1327 
1328         di_alias->self = off;
1329         di_alias->next = all->aliases;
1330         all->aliases = off;
1331         (void) strcpy(di_alias->alias, apair->pair_alias);
1332         di_alias->curroff = curroff;
1333 
1334         off += size;
1335 
1336         *offp = off;
1337 }
1338 
1339 /*
1340  * This is the main function that takes a snapshot
1341  */
1342 static di_off_t
1343 di_snapshot(struct di_state *st)
1344 {
1345         di_off_t        off;
1346         struct di_all   *all;
1347         dev_info_t      *rootnode;
1348         char            buf[80];
1349         int             plen;
1350         char            *path;
1351         vnode_t         *vp;
1352         int             i;
1353 
1354         all = DI_ALL_PTR(st);
1355         dcmn_err((CE_CONT, "Taking a snapshot of devinfo tree...\n"));
1356 
1357         /*
1358          * Translate requested root path if an alias and snap-root != "/"
1359          */
1360         if (ddi_aliases_present == B_TRUE && strcmp(all->root_path, "/") != 0) {
1361                 /* If there is no redirected alias, use root_path as is */
1362                 rootnode = ddi_alias_redirect(all->root_path);
1363                 if (rootnode) {
1364                         (void) ddi_pathname(rootnode, all->root_path);
1365                         goto got_root;
1366                 }
1367         }
1368 
1369         /*
1370          * Verify path before entrusting it to e_ddi_hold_devi_by_path because
1371          * some platforms have OBP bugs where executing the NDI_PROMNAME code
1372          * path against an invalid path results in panic.  The lookupnameat
1373          * is done relative to rootdir without a leading '/' on "devices/"
1374          * to force the lookup to occur in the global zone.
1375          */
1376         plen = strlen("devices/") + strlen(all->root_path) + 1;
1377         path = kmem_alloc(plen, KM_SLEEP);
1378         (void) snprintf(path, plen, "devices/%s", all->root_path);
1379         if (lookupnameat(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp, rootdir)) {
1380                 dcmn_err((CE_CONT, "Devinfo node %s not found\n",
1381                     all->root_path));
1382                 kmem_free(path, plen);
1383                 return (0);
1384         }
1385         kmem_free(path, plen);
1386         VN_RELE(vp);
1387 
1388         /*
1389          * Hold the devinfo node referred by the path.
1390          */
1391         rootnode = e_ddi_hold_devi_by_path(all->root_path, 0);
1392         if (rootnode == NULL) {
1393                 dcmn_err((CE_CONT, "Devinfo node %s not found\n",
1394                     all->root_path));
1395                 return (0);
1396         }
1397 
1398 got_root:
1399         (void) snprintf(buf, sizeof (buf),
1400             "devinfo registered dips (statep=%p)", (void *)st);
1401 
1402         st->reg_dip_hash = mod_hash_create_extended(buf, 64,
1403             di_key_dtor, mod_hash_null_valdtor, di_hash_byptr,
1404             NULL, di_key_cmp, KM_SLEEP);
1405 
1406 
1407         (void) snprintf(buf, sizeof (buf),
1408             "devinfo registered pips (statep=%p)", (void *)st);
1409 
1410         st->reg_pip_hash = mod_hash_create_extended(buf, 64,
1411             di_key_dtor, mod_hash_null_valdtor, di_hash_byptr,
1412             NULL, di_key_cmp, KM_SLEEP);
1413 
1414         if (DINFOHP & st->command) {
1415                 list_create(&st->hp_list, sizeof (i_hp_t),
1416                     offsetof(i_hp_t, hp_link));
1417         }
1418 
1419         /*
1420          * copy the device tree
1421          */
1422         off = di_copytree(DEVI(rootnode), &all->top_devinfo, st);
1423 
1424         if (DINFOPATH & st->command) {
1425                 mdi_walk_vhcis(build_vhci_list, st);
1426         }
1427 
1428         if (DINFOHP & st->command) {
1429                 di_hotplug_children(st);
1430         }
1431 
1432         ddi_release_devi(rootnode);
1433 
1434         /*
1435          * copy the devnames array
1436          */
1437         all->devnames = off;
1438         off = di_copydevnm(&all->devnames, st);
1439 
1440 
1441         /* initialize the hash tables */
1442         st->lnode_count = 0;
1443         st->link_count = 0;
1444 
1445         if (DINFOLYR & st->command) {
1446                 off = di_getlink_data(off, st);
1447         }
1448 
1449         all->aliases = 0;
1450         if (ddi_aliases_present == B_FALSE)
1451                 goto done;
1452 
1453         for (i = 0; i < ddi_aliases.dali_num_pairs; i++) {
1454                 di_copy_aliases(st, &(ddi_aliases.dali_alias_pairs[i]), &off);
1455         }
1456 
1457 done:
1458         /*
1459          * Free up hash tables
1460          */
1461         mod_hash_destroy_hash(st->reg_dip_hash);
1462         mod_hash_destroy_hash(st->reg_pip_hash);
1463 
1464         /*
1465          * Record the timestamp now that we are done with snapshot.
1466          *
1467          * We compute the checksum later and then only if we cache
1468          * the snapshot, since checksumming adds some overhead.
1469          * The checksum is checked later if we read the cache file.
1470          * from disk.
1471          *
1472          * Set checksum field to 0 as CRC is calculated with that
1473          * field set to 0.
1474          */
1475         all->snapshot_time = ddi_get_time();
1476         all->cache_checksum = 0;
1477 
1478         ASSERT(all->snapshot_time != 0);
1479 
1480         return (off);
1481 }
1482 
1483 /*
1484  * Take a snapshot and clean /etc/devices files if DINFOCLEANUP is set
1485  */
1486 static di_off_t
1487 di_snapshot_and_clean(struct di_state *st)
1488 {
1489         di_off_t        off;
1490 
1491         modunload_disable();
1492         off = di_snapshot(st);
1493         if (off != 0 && (st->command & DINFOCLEANUP)) {
1494                 ASSERT(DEVICES_FILES_CLEANABLE(st));
1495                 /*
1496                  * Cleanup /etc/devices files:
1497                  * In order to accurately account for the system configuration
1498                  * in /etc/devices files, the appropriate drivers must be
1499                  * fully configured before the cleanup starts.
1500                  * So enable modunload only after the cleanup.
1501                  */
1502                 i_ddi_clean_devices_files();
1503                 /*
1504                  * Remove backing store nodes for unused devices,
1505                  * which retain past permissions customizations
1506                  * and may be undesired for newly configured devices.
1507                  */
1508                 dev_devices_cleanup();
1509         }
1510         modunload_enable();
1511 
1512         return (off);
1513 }
1514 
1515 /*
1516  * construct vhci linkage in the snapshot.
1517  */
1518 static int
1519 build_vhci_list(dev_info_t *vh_devinfo, void *arg)
1520 {
1521         struct di_all   *all;
1522         struct di_node  *me;
1523         struct di_state *st;
1524         di_off_t        off;
1525         phci_walk_arg_t pwa;
1526 
1527         dcmn_err3((CE_CONT, "build_vhci list\n"));
1528 
1529         dcmn_err3((CE_CONT, "vhci node %s%d\n",
1530             ddi_driver_name(vh_devinfo), ddi_get_instance(vh_devinfo)));
1531 
1532         st = (struct di_state *)arg;
1533         if (di_dip_find(st, vh_devinfo, &off) != 0) {
1534                 dcmn_err((CE_WARN, "di_dip_find error for the given node\n"));
1535                 return (DDI_WALK_TERMINATE);
1536         }
1537 
1538         dcmn_err3((CE_CONT, "st->mem_size: %d vh_devinfo off: 0x%x\n",
1539             st->mem_size, off));
1540 
1541         all = DI_ALL_PTR(st);
1542         if (all->top_vhci_devinfo == 0) {
1543                 all->top_vhci_devinfo = off;
1544         } else {
1545                 me = DI_NODE(di_mem_addr(st, all->top_vhci_devinfo));
1546 
1547                 while (me->next_vhci != 0) {
1548                         me = DI_NODE(di_mem_addr(st, me->next_vhci));
1549                 }
1550 
1551                 me->next_vhci = off;
1552         }
1553 
1554         pwa.off = off;
1555         pwa.st = st;
1556         mdi_vhci_walk_phcis(vh_devinfo, build_phci_list, &pwa);
1557 
1558         return (DDI_WALK_CONTINUE);
1559 }
1560 
1561 /*
1562  * construct phci linkage for the given vhci in the snapshot.
1563  */
1564 static int
1565 build_phci_list(dev_info_t *ph_devinfo, void *arg)
1566 {
1567         struct di_node  *vh_di_node;
1568         struct di_node  *me;
1569         phci_walk_arg_t *pwa;
1570         di_off_t        off;
1571 
1572         pwa = (phci_walk_arg_t *)arg;
1573 
1574         dcmn_err3((CE_CONT, "build_phci list for vhci at offset: 0x%x\n",
1575             pwa->off));
1576 
1577         vh_di_node = DI_NODE(di_mem_addr(pwa->st, pwa->off));
1578         if (di_dip_find(pwa->st, ph_devinfo, &off) != 0) {
1579                 dcmn_err((CE_WARN, "di_dip_find error for the given node\n"));
1580                 return (DDI_WALK_TERMINATE);
1581         }
1582 
1583         dcmn_err3((CE_CONT, "phci node %s%d, at offset 0x%x\n",
1584             ddi_driver_name(ph_devinfo), ddi_get_instance(ph_devinfo), off));
1585 
1586         if (vh_di_node->top_phci == 0) {
1587                 vh_di_node->top_phci = off;
1588                 return (DDI_WALK_CONTINUE);
1589         }
1590 
1591         me = DI_NODE(di_mem_addr(pwa->st, vh_di_node->top_phci));
1592 
1593         while (me->next_phci != 0) {
1594                 me = DI_NODE(di_mem_addr(pwa->st, me->next_phci));
1595         }
1596         me->next_phci = off;
1597 
1598         return (DDI_WALK_CONTINUE);
1599 }
1600 
1601 /*
1602  * Assumes all devinfo nodes in device tree have been snapshotted
1603  */
1604 static void
1605 snap_driver_list(struct di_state *st, struct devnames *dnp, di_off_t *off_p)
1606 {
1607         struct dev_info *node;
1608         struct di_node  *me;
1609         di_off_t        off;
1610 
1611         ASSERT(mutex_owned(&dnp->dn_lock));
1612 
1613         node = DEVI(dnp->dn_head);
1614         for (; node; node = node->devi_next) {
1615                 if (di_dip_find(st, (dev_info_t *)node, &off) != 0)
1616                         continue;
1617 
1618                 ASSERT(off > 0);
1619                 me = DI_NODE(di_mem_addr(st, off));
1620                 ASSERT(me->next == 0 || me->next == -1);
1621                 /*
1622                  * Only nodes which were BOUND when they were
1623                  * snapshotted will be added to per-driver list.
1624                  */
1625                 if (me->next != -1)
1626                         continue;
1627 
1628                 *off_p = off;
1629                 off_p = &me->next;
1630         }
1631 
1632         *off_p = 0;
1633 }
1634 
1635 /*
1636  * Copy the devnames array, so we have a list of drivers in the snapshot.
1637  * Also makes it possible to locate the per-driver devinfo nodes.
1638  */
1639 static di_off_t
1640 di_copydevnm(di_off_t *off_p, struct di_state *st)
1641 {
1642         int             i;
1643         di_off_t        off;
1644         size_t          size;
1645         struct di_devnm *dnp;
1646 
1647         dcmn_err2((CE_CONT, "di_copydevnm: *off_p = %p\n", (void *)off_p));
1648 
1649         /*
1650          * make sure there is some allocated memory
1651          */
1652         size = devcnt * sizeof (struct di_devnm);
1653         *off_p = off = di_checkmem(st, *off_p, size);
1654         dnp = DI_DEVNM(di_mem_addr(st, off));
1655         off += size;
1656 
1657         dcmn_err((CE_CONT, "Start copying devnamesp[%d] at offset 0x%x\n",
1658             devcnt, off));
1659 
1660         for (i = 0; i < devcnt; i++) {
1661                 if (devnamesp[i].dn_name == NULL) {
1662                         continue;
1663                 }
1664 
1665                 /*
1666                  * dn_name is not freed during driver unload or removal.
1667                  *
1668                  * There is a race condition when make_devname() changes
1669                  * dn_name during our strcpy. This should be rare since
1670                  * only add_drv does this. At any rate, we never had a
1671                  * problem with ddi_name_to_major(), which should have
1672                  * the same problem.
1673                  */
1674                 dcmn_err2((CE_CONT, "di_copydevnm: %s%d, off=%x\n",
1675                     devnamesp[i].dn_name, devnamesp[i].dn_instance, off));
1676 
1677                 size = strlen(devnamesp[i].dn_name) + 1;
1678                 dnp[i].name = off = di_checkmem(st, off, size);
1679                 (void) strcpy((char *)di_mem_addr(st, off),
1680                     devnamesp[i].dn_name);
1681                 off += size;
1682 
1683                 mutex_enter(&devnamesp[i].dn_lock);
1684 
1685                 /*
1686                  * Snapshot per-driver node list
1687                  */
1688                 snap_driver_list(st, &devnamesp[i], &dnp[i].head);
1689 
1690                 /*
1691                  * This is not used by libdevinfo, leave it for now
1692                  */
1693                 dnp[i].flags = devnamesp[i].dn_flags;
1694                 dnp[i].instance = devnamesp[i].dn_instance;
1695 
1696                 /*
1697                  * get global properties
1698                  */
1699                 if ((DINFOPROP & st->command) &&
1700                     devnamesp[i].dn_global_prop_ptr) {
1701                         dnp[i].global_prop = off;
1702                         off = di_getprop(DI_PROP_GLB_LIST,
1703                             &devnamesp[i].dn_global_prop_ptr->prop_list,
1704                             &dnp[i].global_prop, st, NULL);
1705                 }
1706 
1707                 /*
1708                  * Bit encode driver ops: & bus_ops, cb_ops, & cb_ops->cb_str
1709                  */
1710                 if (CB_DRV_INSTALLED(devopsp[i])) {
1711                         if (devopsp[i]->devo_cb_ops) {
1712                                 dnp[i].ops |= DI_CB_OPS;
1713                                 if (devopsp[i]->devo_cb_ops->cb_str)
1714                                         dnp[i].ops |= DI_STREAM_OPS;
1715                         }
1716                         if (NEXUS_DRV(devopsp[i])) {
1717                                 dnp[i].ops |= DI_BUS_OPS;
1718                         }
1719                 }
1720 
1721                 mutex_exit(&devnamesp[i].dn_lock);
1722         }
1723 
1724         dcmn_err((CE_CONT, "End copying devnamesp at offset 0x%x\n", off));
1725 
1726         return (off);
1727 }
1728 
1729 /*
1730  * Copy the kernel devinfo tree. The tree and the devnames array forms
1731  * the entire snapshot (see also di_copydevnm).
1732  */
1733 static di_off_t
1734 di_copytree(struct dev_info *root, di_off_t *off_p, struct di_state *st)
1735 {
1736         di_off_t        off;
1737         struct dev_info *node;
1738         struct di_stack *dsp = kmem_zalloc(sizeof (struct di_stack), KM_SLEEP);
1739 
1740         dcmn_err((CE_CONT, "di_copytree: root = %p, *off_p = %x\n",
1741             (void *)root, *off_p));
1742 
1743         /* force attach drivers */
1744         if (i_ddi_devi_attached((dev_info_t *)root) &&
1745             (st->command & DINFOSUBTREE) && (st->command & DINFOFORCE)) {
1746                 (void) ndi_devi_config((dev_info_t *)root,
1747                     NDI_CONFIG | NDI_DEVI_PERSIST | NDI_NO_EVENT |
1748                     NDI_DRV_CONF_REPROBE);
1749         }
1750 
1751         /*
1752          * Push top_devinfo onto a stack
1753          *
1754          * The stack is necessary to avoid recursion, which can overrun
1755          * the kernel stack.
1756          */
1757         PUSH_STACK(dsp, root, off_p);
1758 
1759         /*
1760          * As long as there is a node on the stack, copy the node.
1761          * di_copynode() is responsible for pushing and popping
1762          * child and sibling nodes on the stack.
1763          */
1764         while (!EMPTY_STACK(dsp)) {
1765                 node = TOP_NODE(dsp);
1766                 off = di_copynode(node, dsp, st);
1767         }
1768 
1769         /*
1770          * Free the stack structure
1771          */
1772         kmem_free(dsp, sizeof (struct di_stack));
1773 
1774         return (off);
1775 }
1776 
1777 /*
1778  * This is the core function, which copies all data associated with a single
1779  * node into the snapshot. The amount of information is determined by the
1780  * ioctl command.
1781  */
1782 static di_off_t
1783 di_copynode(struct dev_info *node, struct di_stack *dsp, struct di_state *st)
1784 {
1785         di_off_t        off;
1786         struct di_node  *me;
1787         size_t          size;
1788         struct dev_info *n;
1789 
1790         dcmn_err2((CE_CONT, "di_copynode: depth = %x\n", dsp->depth));
1791         ASSERT((node != NULL) && (node == TOP_NODE(dsp)));
1792 
1793         /*
1794          * check memory usage, and fix offsets accordingly.
1795          */
1796         size = sizeof (struct di_node);
1797         *(TOP_OFFSET(dsp)) = off = di_checkmem(st, *(TOP_OFFSET(dsp)), size);
1798         me = DI_NODE(di_mem_addr(st, off));
1799         me->self = off;
1800         off += size;
1801 
1802         dcmn_err((CE_CONT, "copy node %s, instance #%d, at offset 0x%x\n",
1803             node->devi_node_name, node->devi_instance, off));
1804 
1805         /*
1806          * Node parameters:
1807          * self         -- offset of current node within snapshot
1808          * nodeid       -- pointer to PROM node (tri-valued)
1809          * state        -- hot plugging device state
1810          * node_state   -- devinfo node state
1811          */
1812         me->instance = node->devi_instance;
1813         me->nodeid = node->devi_nodeid;
1814         me->node_class = node->devi_node_class;
1815         me->attributes = node->devi_node_attributes;
1816         me->state = node->devi_state;
1817         me->flags = node->devi_flags;
1818         me->node_state = node->devi_node_state;
1819         me->next_vhci = 0;           /* Filled up by build_vhci_list. */
1820         me->top_phci = 0;            /* Filled up by build_phci_list. */
1821         me->next_phci = 0;           /* Filled up by build_phci_list. */
1822         me->multipath_component = MULTIPATH_COMPONENT_NONE; /* set default. */
1823         me->user_private_data = NULL;
1824 
1825         /*
1826          * Get parent's offset in snapshot from the stack
1827          * and store it in the current node
1828          */
1829         if (dsp->depth > 1) {
1830                 me->parent = *(PARENT_OFFSET(dsp));
1831         }
1832 
1833         /*
1834          * Save the offset of this di_node in a hash table.
1835          * This is used later to resolve references to this
1836          * dip from other parts of the tree (per-driver list,
1837          * multipathing linkages, layered usage linkages).
1838          * The key used for the hash table is derived from
1839          * information in the dip.
1840          */
1841         di_register_dip(st, (dev_info_t *)node, me->self);
1842 
1843 #ifdef  DEVID_COMPATIBILITY
1844         /* check for devid as property marker */
1845         if (node->devi_devid_str) {
1846                 ddi_devid_t     devid;
1847 
1848                 /*
1849                  * The devid is now represented as a property. For
1850                  * compatibility with di_devid() interface in libdevinfo we
1851                  * must return it as a binary structure in the snapshot. When
1852                  * (if) di_devid() is removed from libdevinfo then the code
1853                  * related to DEVID_COMPATIBILITY can be removed.
1854                  */
1855                 if (ddi_devid_str_decode(node->devi_devid_str, &devid, NULL) ==
1856                     DDI_SUCCESS) {
1857                         size = ddi_devid_sizeof(devid);
1858                         off = di_checkmem(st, off, size);
1859                         me->devid = off;
1860                         bcopy(devid, di_mem_addr(st, off), size);
1861                         off += size;
1862                         ddi_devid_free(devid);
1863                 }
1864         }
1865 #endif  /* DEVID_COMPATIBILITY */
1866 
1867         if (node->devi_node_name) {
1868                 size = strlen(node->devi_node_name) + 1;
1869                 me->node_name = off = di_checkmem(st, off, size);
1870                 (void) strcpy(di_mem_addr(st, off), node->devi_node_name);
1871                 off += size;
1872         }
1873 
1874         if (node->devi_compat_names && (node->devi_compat_length > 1)) {
1875                 size = node->devi_compat_length;
1876                 me->compat_names = off = di_checkmem(st, off, size);
1877                 me->compat_length = (int)size;
1878                 bcopy(node->devi_compat_names, di_mem_addr(st, off), size);
1879                 off += size;
1880         }
1881 
1882         if (node->devi_addr) {
1883                 size = strlen(node->devi_addr) + 1;
1884                 me->address = off = di_checkmem(st, off, size);
1885                 (void) strcpy(di_mem_addr(st, off), node->devi_addr);
1886                 off += size;
1887         }
1888 
1889         if (node->devi_binding_name) {
1890                 size = strlen(node->devi_binding_name) + 1;
1891                 me->bind_name = off = di_checkmem(st, off, size);
1892                 (void) strcpy(di_mem_addr(st, off), node->devi_binding_name);
1893                 off += size;
1894         }
1895 
1896         me->drv_major = node->devi_major;
1897 
1898         /*
1899          * If the dip is BOUND, set the next pointer of the
1900          * per-instance list to -1, indicating that it is yet to be resolved.
1901          * This will be resolved later in snap_driver_list().
1902          */
1903         if (me->drv_major != -1) {
1904                 me->next = -1;
1905         } else {
1906                 me->next = 0;
1907         }
1908 
1909         /*
1910          * An optimization to skip mutex_enter when not needed.
1911          */
1912         if (!((DINFOMINOR | DINFOPROP | DINFOPATH | DINFOHP) & st->command)) {
1913                 goto priv_data;
1914         }
1915 
1916         /*
1917          * LOCKING: We already have an active ndi_devi_enter to gather the
1918          * minor data, and we will take devi_lock to gather properties as
1919          * needed off di_getprop.
1920          */
1921         if (!(DINFOMINOR & st->command)) {
1922                 goto path;
1923         }
1924 
1925         ASSERT(DEVI_BUSY_OWNED(node));
1926         if (node->devi_minor) {              /* minor data */
1927                 me->minor_data = off;
1928                 off = di_getmdata(node->devi_minor, &me->minor_data,
1929                     me->self, st);
1930         }
1931 
1932 path:
1933         if (!(DINFOPATH & st->command)) {
1934                 goto property;
1935         }
1936 
1937         if (MDI_VHCI(node)) {
1938                 me->multipath_component = MULTIPATH_COMPONENT_VHCI;
1939         }
1940 
1941         if (MDI_CLIENT(node)) {
1942                 me->multipath_component = MULTIPATH_COMPONENT_CLIENT;
1943                 me->multipath_client = off;
1944                 off = di_getpath_data((dev_info_t *)node, &me->multipath_client,
1945                     me->self, st, 1);
1946                 dcmn_err((CE_WARN, "me->multipath_client = %x for node %p "
1947                     "component type = %d.  off=%d",
1948                     me->multipath_client,
1949                     (void *)node, node->devi_mdi_component, off));
1950         }
1951 
1952         if (MDI_PHCI(node)) {
1953                 me->multipath_component = MULTIPATH_COMPONENT_PHCI;
1954                 me->multipath_phci = off;
1955                 off = di_getpath_data((dev_info_t *)node, &me->multipath_phci,
1956                     me->self, st, 0);
1957                 dcmn_err((CE_WARN, "me->multipath_phci = %x for node %p "
1958                     "component type = %d.  off=%d",
1959                     me->multipath_phci,
1960                     (void *)node, node->devi_mdi_component, off));
1961         }
1962 
1963 property:
1964         if (!(DINFOPROP & st->command)) {
1965                 goto hotplug_data;
1966         }
1967 
1968         if (node->devi_drv_prop_ptr) {       /* driver property list */
1969                 me->drv_prop = off;
1970                 off = di_getprop(DI_PROP_DRV_LIST, &node->devi_drv_prop_ptr,
1971                     &me->drv_prop, st, node);
1972         }
1973 
1974         if (node->devi_sys_prop_ptr) {       /* system property list */
1975                 me->sys_prop = off;
1976                 off = di_getprop(DI_PROP_SYS_LIST, &node->devi_sys_prop_ptr,
1977                     &me->sys_prop, st, node);
1978         }
1979 
1980         if (node->devi_hw_prop_ptr) {        /* hardware property list */
1981                 me->hw_prop = off;
1982                 off = di_getprop(DI_PROP_HW_LIST, &node->devi_hw_prop_ptr,
1983                     &me->hw_prop, st, node);
1984         }
1985 
1986         if (node->devi_global_prop_list == NULL) {
1987                 me->glob_prop = (di_off_t)-1;        /* not global property */
1988         } else {
1989                 /*
1990                  * Make copy of global property list if this devinfo refers
1991                  * global properties different from what's on the devnames
1992                  * array. It can happen if there has been a forced
1993                  * driver.conf update. See mod_drv(1M).
1994                  */
1995                 ASSERT(me->drv_major != -1);
1996                 if (node->devi_global_prop_list !=
1997                     devnamesp[me->drv_major].dn_global_prop_ptr) {
1998                         me->glob_prop = off;
1999                         off = di_getprop(DI_PROP_GLB_LIST,
2000                             &node->devi_global_prop_list->prop_list,
2001                             &me->glob_prop, st, node);
2002                 }
2003         }
2004 
2005 hotplug_data:
2006         if (!(DINFOHP & st->command)) {
2007                 goto priv_data;
2008         }
2009 
2010         if (node->devi_hp_hdlp) {    /* hotplug data */
2011                 me->hp_data = off;
2012                 off = di_gethpdata(node->devi_hp_hdlp, &me->hp_data, st);
2013         }
2014 
2015 priv_data:
2016         if (!(DINFOPRIVDATA & st->command)) {
2017                 goto pm_info;
2018         }
2019 
2020         if (ddi_get_parent_data((dev_info_t *)node) != NULL) {
2021                 me->parent_data = off;
2022                 off = di_getppdata(node, &me->parent_data, st);
2023         }
2024 
2025         if (ddi_get_driver_private((dev_info_t *)node) != NULL) {
2026                 me->driver_data = off;
2027                 off = di_getdpdata(node, &me->driver_data, st);
2028         }
2029 
2030 pm_info: /* NOT implemented */
2031 
2032 subtree:
2033         /* keep the stack aligned */
2034         off = DI_ALIGN(off);
2035 
2036         if (!(DINFOSUBTREE & st->command)) {
2037                 POP_STACK(dsp);
2038                 return (off);
2039         }
2040 
2041 child:
2042         /*
2043          * If there is a visible child--push child onto stack.
2044          * Hold the parent (me) busy while doing so.
2045          */
2046         if ((n = node->devi_child) != NULL) {
2047                 /* skip hidden nodes */
2048                 while (n && ndi_dev_is_hidden_node((dev_info_t *)n))
2049                         n = n->devi_sibling;
2050                 if (n) {
2051                         me->child = off;
2052                         PUSH_STACK(dsp, n, &me->child);
2053                         return (me->child);
2054                 }
2055         }
2056 
2057 sibling:
2058         /*
2059          * Done with any child nodes, unroll the stack till a visible
2060          * sibling of a parent node is found or root node is reached.
2061          */
2062         POP_STACK(dsp);
2063         while (!EMPTY_STACK(dsp)) {
2064                 if ((n = node->devi_sibling) != NULL) {
2065                         /* skip hidden nodes */
2066                         while (n && ndi_dev_is_hidden_node((dev_info_t *)n))
2067                                 n = n->devi_sibling;
2068                         if (n) {
2069                                 me->sibling = DI_ALIGN(off);
2070                                 PUSH_STACK(dsp, n, &me->sibling);
2071                                 return (me->sibling);
2072                         }
2073                 }
2074                 node = TOP_NODE(dsp);
2075                 me = DI_NODE(di_mem_addr(st, *(TOP_OFFSET(dsp))));
2076                 POP_STACK(dsp);
2077         }
2078 
2079         /*
2080          * DONE with all nodes
2081          */
2082         return (off);
2083 }
2084 
2085 static i_lnode_t *
2086 i_lnode_alloc(int modid)
2087 {
2088         i_lnode_t       *i_lnode;
2089 
2090         i_lnode = kmem_zalloc(sizeof (i_lnode_t), KM_SLEEP);
2091 
2092         ASSERT(modid != -1);
2093         i_lnode->modid = modid;
2094 
2095         return (i_lnode);
2096 }
2097 
2098 static void
2099 i_lnode_free(i_lnode_t *i_lnode)
2100 {
2101         kmem_free(i_lnode, sizeof (i_lnode_t));
2102 }
2103 
2104 static void
2105 i_lnode_check_free(i_lnode_t *i_lnode)
2106 {
2107         /* This lnode and its dip must have been snapshotted */
2108         ASSERT(i_lnode->self > 0);
2109         ASSERT(i_lnode->di_node->self > 0);
2110 
2111         /* at least 1 link (in or out) must exist for this lnode */
2112         ASSERT(i_lnode->link_in || i_lnode->link_out);
2113 
2114         i_lnode_free(i_lnode);
2115 }
2116 
2117 static i_link_t *
2118 i_link_alloc(int spec_type)
2119 {
2120         i_link_t        *i_link;
2121 
2122         i_link = kmem_zalloc(sizeof (i_link_t), KM_SLEEP);
2123         i_link->spec_type = spec_type;
2124 
2125         return (i_link);
2126 }
2127 
2128 static void
2129 i_link_check_free(i_link_t *i_link)
2130 {
2131         /* This link must have been snapshotted */
2132         ASSERT(i_link->self > 0);
2133 
2134         /* Both endpoint lnodes must exist for this link */
2135         ASSERT(i_link->src_lnode);
2136         ASSERT(i_link->tgt_lnode);
2137 
2138         kmem_free(i_link, sizeof (i_link_t));
2139 }
2140 
2141 /*ARGSUSED*/
2142 static uint_t
2143 i_lnode_hashfunc(void *arg, mod_hash_key_t key)
2144 {
2145         i_lnode_t       *i_lnode = (i_lnode_t *)key;
2146         struct di_node  *ptr;
2147         dev_t           dev;
2148 
2149         dev = i_lnode->devt;
2150         if (dev != DDI_DEV_T_NONE)
2151                 return (i_lnode->modid + getminor(dev) + getmajor(dev));
2152 
2153         ptr = i_lnode->di_node;
2154         ASSERT(ptr->self > 0);
2155         if (ptr) {
2156                 uintptr_t k = (uintptr_t)ptr;
2157                 k >>= (int)highbit(sizeof (struct di_node));
2158                 return ((uint_t)k);
2159         }
2160 
2161         return (i_lnode->modid);
2162 }
2163 
2164 static int
2165 i_lnode_cmp(void *arg1, void *arg2)
2166 {
2167         i_lnode_t       *i_lnode1 = (i_lnode_t *)arg1;
2168         i_lnode_t       *i_lnode2 = (i_lnode_t *)arg2;
2169 
2170         if (i_lnode1->modid != i_lnode2->modid) {
2171                 return ((i_lnode1->modid < i_lnode2->modid) ? -1 : 1);
2172         }
2173 
2174         if (i_lnode1->di_node != i_lnode2->di_node)
2175                 return ((i_lnode1->di_node < i_lnode2->di_node) ? -1 : 1);
2176 
2177         if (i_lnode1->devt != i_lnode2->devt)
2178                 return ((i_lnode1->devt < i_lnode2->devt) ? -1 : 1);
2179 
2180         return (0);
2181 }
2182 
2183 /*
2184  * An lnode represents a {dip, dev_t} tuple. A link represents a
2185  * {src_lnode, tgt_lnode, spec_type} tuple.
2186  * The following callback assumes that LDI framework ref-counts the
2187  * src_dip and tgt_dip while invoking this callback.
2188  */
2189 static int
2190 di_ldi_callback(const ldi_usage_t *ldi_usage, void *arg)
2191 {
2192         struct di_state *st = (struct di_state *)arg;
2193         i_lnode_t       *src_lnode, *tgt_lnode, *i_lnode;
2194         i_link_t        **i_link_next, *i_link;
2195         di_off_t        soff, toff;
2196         mod_hash_val_t  nodep = NULL;
2197         int             res;
2198 
2199         /*
2200          * if the source or target of this device usage information doesn't
2201          * correspond to a device node then we don't report it via
2202          * libdevinfo so return.
2203          */
2204         if ((ldi_usage->src_dip == NULL) || (ldi_usage->tgt_dip == NULL))
2205                 return (LDI_USAGE_CONTINUE);
2206 
2207         ASSERT(e_ddi_devi_holdcnt(ldi_usage->src_dip));
2208         ASSERT(e_ddi_devi_holdcnt(ldi_usage->tgt_dip));
2209 
2210         /*
2211          * Skip the ldi_usage if either src or tgt dip is not in the
2212          * snapshot. This saves us from pruning bad lnodes/links later.
2213          */
2214         if (di_dip_find(st, ldi_usage->src_dip, &soff) != 0)
2215                 return (LDI_USAGE_CONTINUE);
2216         if (di_dip_find(st, ldi_usage->tgt_dip, &toff) != 0)
2217                 return (LDI_USAGE_CONTINUE);
2218 
2219         ASSERT(soff > 0);
2220         ASSERT(toff > 0);
2221 
2222         /*
2223          * allocate an i_lnode and add it to the lnode hash
2224          * if it is not already present. For this particular
2225          * link the lnode is a source, but it may
2226          * participate as tgt or src in any number of layered
2227          * operations - so it may already be in the hash.
2228          */
2229         i_lnode = i_lnode_alloc(ldi_usage->src_modid);
2230         i_lnode->di_node = DI_NODE(di_mem_addr(st, soff));
2231         i_lnode->devt = ldi_usage->src_devt;
2232 
2233         res = mod_hash_find(st->lnode_hash, i_lnode, &nodep);
2234         if (res == MH_ERR_NOTFOUND) {
2235                 /*
2236                  * new i_lnode
2237                  * add it to the hash and increment the lnode count
2238                  */
2239                 res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode);
2240                 ASSERT(res == 0);
2241                 st->lnode_count++;
2242                 src_lnode = i_lnode;
2243         } else {
2244                 /* this i_lnode already exists in the lnode_hash */
2245                 i_lnode_free(i_lnode);
2246                 src_lnode = (i_lnode_t *)nodep;
2247         }
2248 
2249         /*
2250          * allocate a tgt i_lnode and add it to the lnode hash
2251          */
2252         i_lnode = i_lnode_alloc(ldi_usage->tgt_modid);
2253         i_lnode->di_node = DI_NODE(di_mem_addr(st, toff));
2254         i_lnode->devt = ldi_usage->tgt_devt;
2255 
2256         res = mod_hash_find(st->lnode_hash, i_lnode, &nodep);
2257         if (res == MH_ERR_NOTFOUND) {
2258                 /*
2259                  * new i_lnode
2260                  * add it to the hash and increment the lnode count
2261                  */
2262                 res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode);
2263                 ASSERT(res == 0);
2264                 st->lnode_count++;
2265                 tgt_lnode = i_lnode;
2266         } else {
2267                 /* this i_lnode already exists in the lnode_hash */
2268                 i_lnode_free(i_lnode);
2269                 tgt_lnode = (i_lnode_t *)nodep;
2270         }
2271 
2272         /*
2273          * allocate a i_link
2274          */
2275         i_link = i_link_alloc(ldi_usage->tgt_spec_type);
2276         i_link->src_lnode = src_lnode;
2277         i_link->tgt_lnode = tgt_lnode;
2278 
2279         /*
2280          * add this link onto the src i_lnodes outbound i_link list
2281          */
2282         i_link_next = &(src_lnode->link_out);
2283         while (*i_link_next != NULL) {
2284                 if ((i_lnode_cmp(tgt_lnode, (*i_link_next)->tgt_lnode) == 0) &&
2285                     (i_link->spec_type == (*i_link_next)->spec_type)) {
2286                         /* this link already exists */
2287                         kmem_free(i_link, sizeof (i_link_t));
2288                         return (LDI_USAGE_CONTINUE);
2289                 }
2290                 i_link_next = &((*i_link_next)->src_link_next);
2291         }
2292         *i_link_next = i_link;
2293 
2294         /*
2295          * add this link onto the tgt i_lnodes inbound i_link list
2296          */
2297         i_link_next = &(tgt_lnode->link_in);
2298         while (*i_link_next != NULL) {
2299                 ASSERT(i_lnode_cmp(src_lnode, (*i_link_next)->src_lnode) != 0);
2300                 i_link_next = &((*i_link_next)->tgt_link_next);
2301         }
2302         *i_link_next = i_link;
2303 
2304         /*
2305          * add this i_link to the link hash
2306          */
2307         res = mod_hash_insert(st->link_hash, i_link, i_link);
2308         ASSERT(res == 0);
2309         st->link_count++;
2310 
2311         return (LDI_USAGE_CONTINUE);
2312 }
2313 
2314 struct i_layer_data {
2315         struct di_state *st;
2316         int             lnode_count;
2317         int             link_count;
2318         di_off_t        lnode_off;
2319         di_off_t        link_off;
2320 };
2321 
2322 /*ARGSUSED*/
2323 static uint_t
2324 i_link_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
2325 {
2326         i_link_t                *i_link  = (i_link_t *)key;
2327         struct i_layer_data     *data = arg;
2328         struct di_link          *me;
2329         struct di_lnode         *melnode;
2330         struct di_node          *medinode;
2331 
2332         ASSERT(i_link->self == 0);
2333 
2334         i_link->self = data->link_off +
2335             (data->link_count * sizeof (struct di_link));
2336         data->link_count++;
2337 
2338         ASSERT(data->link_off > 0 && data->link_count > 0);
2339         ASSERT(data->lnode_count == data->st->lnode_count); /* lnodes done */
2340         ASSERT(data->link_count <= data->st->link_count);
2341 
2342         /* fill in fields for the di_link snapshot */
2343         me = DI_LINK(di_mem_addr(data->st, i_link->self));
2344         me->self = i_link->self;
2345         me->spec_type = i_link->spec_type;
2346 
2347         /*
2348          * The src_lnode and tgt_lnode i_lnode_t for this i_link_t
2349          * are created during the LDI table walk. Since we are
2350          * walking the link hash, the lnode hash has already been
2351          * walked and the lnodes have been snapshotted. Save lnode
2352          * offsets.
2353          */
2354         me->src_lnode = i_link->src_lnode->self;
2355         me->tgt_lnode = i_link->tgt_lnode->self;
2356 
2357         /*
2358          * Save this link's offset in the src_lnode snapshot's link_out
2359          * field
2360          */
2361         melnode = DI_LNODE(di_mem_addr(data->st, me->src_lnode));
2362         me->src_link_next = melnode->link_out;
2363         melnode->link_out = me->self;
2364 
2365         /*
2366          * Put this link on the tgt_lnode's link_in field
2367          */
2368         melnode = DI_LNODE(di_mem_addr(data->st, me->tgt_lnode));
2369         me->tgt_link_next = melnode->link_in;
2370         melnode->link_in = me->self;
2371 
2372         /*
2373          * An i_lnode_t is only created if the corresponding dip exists
2374          * in the snapshot. A pointer to the di_node is saved in the
2375          * i_lnode_t when it is allocated. For this link, get the di_node
2376          * for the source lnode. Then put the link on the di_node's list
2377          * of src links
2378          */
2379         medinode = i_link->src_lnode->di_node;
2380         me->src_node_next = medinode->src_links;
2381         medinode->src_links = me->self;
2382 
2383         /*
2384          * Put this link on the tgt_links list of the target
2385          * dip.
2386          */
2387         medinode = i_link->tgt_lnode->di_node;
2388         me->tgt_node_next = medinode->tgt_links;
2389         medinode->tgt_links = me->self;
2390 
2391         return (MH_WALK_CONTINUE);
2392 }
2393 
2394 /*ARGSUSED*/
2395 static uint_t
2396 i_lnode_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
2397 {
2398         i_lnode_t               *i_lnode = (i_lnode_t *)key;
2399         struct i_layer_data     *data = arg;
2400         struct di_lnode         *me;
2401         struct di_node          *medinode;
2402 
2403         ASSERT(i_lnode->self == 0);
2404 
2405         i_lnode->self = data->lnode_off +
2406             (data->lnode_count * sizeof (struct di_lnode));
2407         data->lnode_count++;
2408 
2409         ASSERT(data->lnode_off > 0 && data->lnode_count > 0);
2410         ASSERT(data->link_count == 0); /* links not done yet */
2411         ASSERT(data->lnode_count <= data->st->lnode_count);
2412 
2413         /* fill in fields for the di_lnode snapshot */
2414         me = DI_LNODE(di_mem_addr(data->st, i_lnode->self));
2415         me->self = i_lnode->self;
2416 
2417         if (i_lnode->devt == DDI_DEV_T_NONE) {
2418                 me->dev_major = DDI_MAJOR_T_NONE;
2419                 me->dev_minor = DDI_MAJOR_T_NONE;
2420         } else {
2421                 me->dev_major = getmajor(i_lnode->devt);
2422                 me->dev_minor = getminor(i_lnode->devt);
2423         }
2424 
2425         /*
2426          * The dip corresponding to this lnode must exist in
2427          * the snapshot or we wouldn't have created the i_lnode_t
2428          * during LDI walk. Save the offset of the dip.
2429          */
2430         ASSERT(i_lnode->di_node && i_lnode->di_node->self > 0);
2431         me->node = i_lnode->di_node->self;
2432 
2433         /*
2434          * There must be at least one link in or out of this lnode
2435          * or we wouldn't have created it. These fields will be set
2436          * during the link hash walk.
2437          */
2438         ASSERT((i_lnode->link_in != NULL) || (i_lnode->link_out != NULL));
2439 
2440         /*
2441          * set the offset of the devinfo node associated with this
2442          * lnode. Also update the node_next next pointer.  this pointer
2443          * is set if there are multiple lnodes associated with the same
2444          * devinfo node.  (could occure when multiple minor nodes
2445          * are open for one device, etc.)
2446          */
2447         medinode = i_lnode->di_node;
2448         me->node_next = medinode->lnodes;
2449         medinode->lnodes = me->self;
2450 
2451         return (MH_WALK_CONTINUE);
2452 }
2453 
2454 static di_off_t
2455 di_getlink_data(di_off_t off, struct di_state *st)
2456 {
2457         struct i_layer_data     data = {0};
2458         size_t                  size;
2459 
2460         dcmn_err2((CE_CONT, "di_copylyr: off = %x\n", off));
2461 
2462         st->lnode_hash = mod_hash_create_extended("di_lnode_hash", 32,
2463             mod_hash_null_keydtor, (void (*)(mod_hash_val_t))i_lnode_check_free,
2464             i_lnode_hashfunc, NULL, i_lnode_cmp, KM_SLEEP);
2465 
2466         st->link_hash = mod_hash_create_ptrhash("di_link_hash", 32,
2467             (void (*)(mod_hash_val_t))i_link_check_free, sizeof (i_link_t));
2468 
2469         /* get driver layering information */
2470         (void) ldi_usage_walker(st, di_ldi_callback);
2471 
2472         /* check if there is any link data to include in the snapshot */
2473         if (st->lnode_count == 0) {
2474                 ASSERT(st->link_count == 0);
2475                 goto out;
2476         }
2477 
2478         ASSERT(st->link_count != 0);
2479 
2480         /* get a pointer to snapshot memory for all the di_lnodes */
2481         size = sizeof (struct di_lnode) * st->lnode_count;
2482         data.lnode_off = off = di_checkmem(st, off, size);
2483         off += size;
2484 
2485         /* get a pointer to snapshot memory for all the di_links */
2486         size = sizeof (struct di_link) * st->link_count;
2487         data.link_off = off = di_checkmem(st, off, size);
2488         off += size;
2489 
2490         data.lnode_count = data.link_count = 0;
2491         data.st = st;
2492 
2493         /*
2494          * We have lnodes and links that will go into the
2495          * snapshot, so let's walk the respective hashes
2496          * and snapshot them. The various linkages are
2497          * also set up during the walk.
2498          */
2499         mod_hash_walk(st->lnode_hash, i_lnode_walker, (void *)&data);
2500         ASSERT(data.lnode_count == st->lnode_count);
2501 
2502         mod_hash_walk(st->link_hash, i_link_walker, (void *)&data);
2503         ASSERT(data.link_count == st->link_count);
2504 
2505 out:
2506         /* free up the i_lnodes and i_links used to create the snapshot */
2507         mod_hash_destroy_hash(st->lnode_hash);
2508         mod_hash_destroy_hash(st->link_hash);
2509         st->lnode_count = 0;
2510         st->link_count = 0;
2511 
2512         return (off);
2513 }
2514 
2515 
2516 /*
2517  * Copy all minor data nodes attached to a devinfo node into the snapshot.
2518  * It is called from di_copynode with active ndi_devi_enter to protect
2519  * the list of minor nodes.
2520  */
2521 static di_off_t
2522 di_getmdata(struct ddi_minor_data *mnode, di_off_t *off_p, di_off_t node,
2523         struct di_state *st)
2524 {
2525         di_off_t        off;
2526         struct di_minor *me;
2527         size_t          size;
2528 
2529         dcmn_err2((CE_CONT, "di_getmdata:\n"));
2530 
2531         /*
2532          * check memory first
2533          */
2534         off = di_checkmem(st, *off_p, sizeof (struct di_minor));
2535         *off_p = off;
2536 
2537         do {
2538                 me = DI_MINOR(di_mem_addr(st, off));
2539                 me->self = off;
2540                 me->type = mnode->type;
2541                 me->node = node;
2542                 me->user_private_data = NULL;
2543 
2544                 off += sizeof (struct di_minor);
2545 
2546                 /*
2547                  * Split dev_t to major/minor, so it works for
2548                  * both ILP32 and LP64 model
2549                  */
2550                 me->dev_major = getmajor(mnode->ddm_dev);
2551                 me->dev_minor = getminor(mnode->ddm_dev);
2552                 me->spec_type = mnode->ddm_spec_type;
2553 
2554                 if (mnode->ddm_name) {
2555                         size = strlen(mnode->ddm_name) + 1;
2556                         me->name = off = di_checkmem(st, off, size);
2557                         (void) strcpy(di_mem_addr(st, off), mnode->ddm_name);
2558                         off += size;
2559                 }
2560 
2561                 if (mnode->ddm_node_type) {
2562                         size = strlen(mnode->ddm_node_type) + 1;
2563                         me->node_type = off = di_checkmem(st, off, size);
2564                         (void) strcpy(di_mem_addr(st, off),
2565                             mnode->ddm_node_type);
2566                         off += size;
2567                 }
2568 
2569                 off = di_checkmem(st, off, sizeof (struct di_minor));
2570                 me->next = off;
2571                 mnode = mnode->next;
2572         } while (mnode);
2573 
2574         me->next = 0;
2575 
2576         return (off);
2577 }
2578 
2579 /*
2580  * di_register_dip(), di_find_dip(): The dip must be protected
2581  * from deallocation when using these routines - this can either
2582  * be a reference count, a busy hold or a per-driver lock.
2583  */
2584 
2585 static void
2586 di_register_dip(struct di_state *st, dev_info_t *dip, di_off_t off)
2587 {
2588         struct dev_info *node = DEVI(dip);
2589         struct di_key   *key = kmem_zalloc(sizeof (*key), KM_SLEEP);
2590         struct di_dkey  *dk;
2591 
2592         ASSERT(dip);
2593         ASSERT(off > 0);
2594 
2595         key->k_type = DI_DKEY;
2596         dk = &(key->k_u.dkey);
2597 
2598         dk->dk_dip = dip;
2599         dk->dk_major = node->devi_major;
2600         dk->dk_inst = node->devi_instance;
2601         dk->dk_nodeid = node->devi_nodeid;
2602 
2603         if (mod_hash_insert(st->reg_dip_hash, (mod_hash_key_t)key,
2604             (mod_hash_val_t)(uintptr_t)off) != 0) {
2605                 panic(
2606                     "duplicate devinfo (%p) registered during device "
2607                     "tree walk", (void *)dip);
2608         }
2609 }
2610 
2611 
2612 static int
2613 di_dip_find(struct di_state *st, dev_info_t *dip, di_off_t *off_p)
2614 {
2615         /*
2616          * uintptr_t must be used because it matches the size of void *;
2617          * mod_hash expects clients to place results into pointer-size
2618          * containers; since di_off_t is always a 32-bit offset, alignment
2619          * would otherwise be broken on 64-bit kernels.
2620          */
2621         uintptr_t       offset;
2622         struct          di_key key = {0};
2623         struct          di_dkey *dk;
2624 
2625         ASSERT(st->reg_dip_hash);
2626         ASSERT(dip);
2627         ASSERT(off_p);
2628 
2629 
2630         key.k_type = DI_DKEY;
2631         dk = &(key.k_u.dkey);
2632 
2633         dk->dk_dip = dip;
2634         dk->dk_major = DEVI(dip)->devi_major;
2635         dk->dk_inst = DEVI(dip)->devi_instance;
2636         dk->dk_nodeid = DEVI(dip)->devi_nodeid;
2637 
2638         if (mod_hash_find(st->reg_dip_hash, (mod_hash_key_t)&key,
2639             (mod_hash_val_t *)&offset) == 0) {
2640                 *off_p = (di_off_t)offset;
2641                 return (0);
2642         } else {
2643                 return (-1);
2644         }
2645 }
2646 
2647 /*
2648  * di_register_pip(), di_find_pip(): The pip must be protected from deallocation
2649  * when using these routines. The caller must do this by protecting the
2650  * client(or phci)<->pip linkage while traversing the list and then holding the
2651  * pip when it is found in the list.
2652  */
2653 
2654 static void
2655 di_register_pip(struct di_state *st, mdi_pathinfo_t *pip, di_off_t off)
2656 {
2657         struct di_key   *key = kmem_zalloc(sizeof (*key), KM_SLEEP);
2658         char            *path_addr;
2659         struct di_pkey  *pk;
2660 
2661         ASSERT(pip);
2662         ASSERT(off > 0);
2663 
2664         key->k_type = DI_PKEY;
2665         pk = &(key->k_u.pkey);
2666 
2667         pk->pk_pip = pip;
2668         path_addr = mdi_pi_get_addr(pip);
2669         if (path_addr)
2670                 pk->pk_path_addr = i_ddi_strdup(path_addr, KM_SLEEP);
2671         pk->pk_client = mdi_pi_get_client(pip);
2672         pk->pk_phci = mdi_pi_get_phci(pip);
2673 
2674         if (mod_hash_insert(st->reg_pip_hash, (mod_hash_key_t)key,
2675             (mod_hash_val_t)(uintptr_t)off) != 0) {
2676                 panic(
2677                     "duplicate pathinfo (%p) registered during device "
2678                     "tree walk", (void *)pip);
2679         }
2680 }
2681 
2682 /*
2683  * As with di_register_pip, the caller must hold or lock the pip
2684  */
2685 static int
2686 di_pip_find(struct di_state *st, mdi_pathinfo_t *pip, di_off_t *off_p)
2687 {
2688         /*
2689          * uintptr_t must be used because it matches the size of void *;
2690          * mod_hash expects clients to place results into pointer-size
2691          * containers; since di_off_t is always a 32-bit offset, alignment
2692          * would otherwise be broken on 64-bit kernels.
2693          */
2694         uintptr_t       offset;
2695         struct di_key   key = {0};
2696         struct di_pkey  *pk;
2697 
2698         ASSERT(st->reg_pip_hash);
2699         ASSERT(off_p);
2700 
2701         if (pip == NULL) {
2702                 *off_p = 0;
2703                 return (0);
2704         }
2705 
2706         key.k_type = DI_PKEY;
2707         pk = &(key.k_u.pkey);
2708 
2709         pk->pk_pip = pip;
2710         pk->pk_path_addr = mdi_pi_get_addr(pip);
2711         pk->pk_client = mdi_pi_get_client(pip);
2712         pk->pk_phci = mdi_pi_get_phci(pip);
2713 
2714         if (mod_hash_find(st->reg_pip_hash, (mod_hash_key_t)&key,
2715             (mod_hash_val_t *)&offset) == 0) {
2716                 *off_p = (di_off_t)offset;
2717                 return (0);
2718         } else {
2719                 return (-1);
2720         }
2721 }
2722 
2723 static di_path_state_t
2724 path_state_convert(mdi_pathinfo_state_t st)
2725 {
2726         switch (st) {
2727         case MDI_PATHINFO_STATE_ONLINE:
2728                 return (DI_PATH_STATE_ONLINE);
2729         case MDI_PATHINFO_STATE_STANDBY:
2730                 return (DI_PATH_STATE_STANDBY);
2731         case MDI_PATHINFO_STATE_OFFLINE:
2732                 return (DI_PATH_STATE_OFFLINE);
2733         case MDI_PATHINFO_STATE_FAULT:
2734                 return (DI_PATH_STATE_FAULT);
2735         default:
2736                 return (DI_PATH_STATE_UNKNOWN);
2737         }
2738 }
2739 
2740 static uint_t
2741 path_flags_convert(uint_t pi_path_flags)
2742 {
2743         uint_t  di_path_flags = 0;
2744 
2745         /* MDI_PATHINFO_FLAGS_HIDDEN nodes not in snapshot */
2746 
2747         if (pi_path_flags & MDI_PATHINFO_FLAGS_DEVICE_REMOVED)
2748                 di_path_flags |= DI_PATH_FLAGS_DEVICE_REMOVED;
2749 
2750         return (di_path_flags);
2751 }
2752 
2753 
2754 static di_off_t
2755 di_path_getprop(mdi_pathinfo_t *pip, di_off_t *off_p,
2756     struct di_state *st)
2757 {
2758         nvpair_t                *prop = NULL;
2759         struct di_path_prop     *me;
2760         int                     off;
2761         size_t                  size;
2762         char                    *str;
2763         uchar_t                 *buf;
2764         uint_t                  nelems;
2765 
2766         off = *off_p;
2767         if (mdi_pi_get_next_prop(pip, NULL) == NULL) {
2768                 *off_p = 0;
2769                 return (off);
2770         }
2771 
2772         off = di_checkmem(st, off, sizeof (struct di_path_prop));
2773         *off_p = off;
2774 
2775         while (prop = mdi_pi_get_next_prop(pip, prop)) {
2776                 me = DI_PATHPROP(di_mem_addr(st, off));
2777                 me->self = off;
2778                 off += sizeof (struct di_path_prop);
2779 
2780                 /*
2781                  * property name
2782                  */
2783                 size = strlen(nvpair_name(prop)) + 1;
2784                 me->prop_name = off = di_checkmem(st, off, size);
2785                 (void) strcpy(di_mem_addr(st, off), nvpair_name(prop));
2786                 off += size;
2787 
2788                 switch (nvpair_type(prop)) {
2789                 case DATA_TYPE_BYTE:
2790                 case DATA_TYPE_INT16:
2791                 case DATA_TYPE_UINT16:
2792                 case DATA_TYPE_INT32:
2793                 case DATA_TYPE_UINT32:
2794                         me->prop_type = DDI_PROP_TYPE_INT;
2795                         size = sizeof (int32_t);
2796                         off = di_checkmem(st, off, size);
2797                         (void) nvpair_value_int32(prop,
2798                             (int32_t *)di_mem_addr(st, off));
2799                         break;
2800 
2801                 case DATA_TYPE_INT64:
2802                 case DATA_TYPE_UINT64:
2803                         me->prop_type = DDI_PROP_TYPE_INT64;
2804                         size = sizeof (int64_t);
2805                         off = di_checkmem(st, off, size);
2806                         (void) nvpair_value_int64(prop,
2807                             (int64_t *)di_mem_addr(st, off));
2808                         break;
2809 
2810                 case DATA_TYPE_STRING:
2811                         me->prop_type = DDI_PROP_TYPE_STRING;
2812                         (void) nvpair_value_string(prop, &str);
2813                         size = strlen(str) + 1;
2814                         off = di_checkmem(st, off, size);
2815                         (void) strcpy(di_mem_addr(st, off), str);
2816                         break;
2817 
2818                 case DATA_TYPE_BYTE_ARRAY:
2819                 case DATA_TYPE_INT16_ARRAY:
2820                 case DATA_TYPE_UINT16_ARRAY:
2821                 case DATA_TYPE_INT32_ARRAY:
2822                 case DATA_TYPE_UINT32_ARRAY:
2823                 case DATA_TYPE_INT64_ARRAY:
2824                 case DATA_TYPE_UINT64_ARRAY:
2825                         me->prop_type = DDI_PROP_TYPE_BYTE;
2826                         (void) nvpair_value_byte_array(prop, &buf, &nelems);
2827                         size = nelems;
2828                         if (nelems != 0) {
2829                                 off = di_checkmem(st, off, size);
2830                                 bcopy(buf, di_mem_addr(st, off), size);
2831                         }
2832                         break;
2833 
2834                 default:        /* Unknown or unhandled type; skip it */
2835                         size = 0;
2836                         break;
2837                 }
2838 
2839                 if (size > 0) {
2840                         me->prop_data = off;
2841                 }
2842 
2843                 me->prop_len = (int)size;
2844                 off += size;
2845 
2846                 off = di_checkmem(st, off, sizeof (struct di_path_prop));
2847                 me->prop_next = off;
2848         }
2849 
2850         me->prop_next = 0;
2851         return (off);
2852 }
2853 
2854 
2855 static void
2856 di_path_one_endpoint(struct di_path *me, di_off_t noff, di_off_t **off_pp,
2857     int get_client)
2858 {
2859         if (get_client) {
2860                 ASSERT(me->path_client == 0);
2861                 me->path_client = noff;
2862                 ASSERT(me->path_c_link == 0);
2863                 *off_pp = &me->path_c_link;
2864                 me->path_snap_state &=
2865                     ~(DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOCLINK);
2866         } else {
2867                 ASSERT(me->path_phci == 0);
2868                 me->path_phci = noff;
2869                 ASSERT(me->path_p_link == 0);
2870                 *off_pp = &me->path_p_link;
2871                 me->path_snap_state &=
2872                     ~(DI_PATH_SNAP_NOPHCI | DI_PATH_SNAP_NOPLINK);
2873         }
2874 }
2875 
2876 /*
2877  * off_p: pointer to the linkage field. This links pips along the client|phci
2878  *         linkage list.
2879  * noff  : Offset for the endpoint dip snapshot.
2880  */
2881 static di_off_t
2882 di_getpath_data(dev_info_t *dip, di_off_t *off_p, di_off_t noff,
2883     struct di_state *st, int get_client)
2884 {
2885         di_off_t        off;
2886         mdi_pathinfo_t  *pip;
2887         struct di_path  *me;
2888         mdi_pathinfo_t  *(*next_pip)(dev_info_t *, mdi_pathinfo_t *);
2889         size_t          size;
2890 
2891         dcmn_err2((CE_WARN, "di_getpath_data: client = %d", get_client));
2892 
2893         /*
2894          * The naming of the following mdi_xyz() is unfortunately
2895          * non-intuitive. mdi_get_next_phci_path() follows the
2896          * client_link i.e. the list of pip's belonging to the
2897          * given client dip.
2898          */
2899         if (get_client)
2900                 next_pip = &mdi_get_next_phci_path;
2901         else
2902                 next_pip = &mdi_get_next_client_path;
2903 
2904         off = *off_p;
2905 
2906         pip = NULL;
2907         while (pip = (*next_pip)(dip, pip)) {
2908                 di_off_t stored_offset;
2909 
2910                 dcmn_err((CE_WARN, "marshalling pip = %p", (void *)pip));
2911 
2912                 mdi_pi_lock(pip);
2913 
2914                 /* We don't represent hidden paths in the snapshot */
2915                 if (mdi_pi_ishidden(pip)) {
2916                         dcmn_err((CE_WARN, "hidden, skip"));
2917                         mdi_pi_unlock(pip);
2918                         continue;
2919                 }
2920 
2921                 if (di_pip_find(st, pip, &stored_offset) != -1) {
2922                         /*
2923                          * We've already seen this pathinfo node so we need to
2924                          * take care not to snap it again; However, one endpoint
2925                          * and linkage will be set here. The other endpoint
2926                          * and linkage has already been set when the pip was
2927                          * first snapshotted i.e. when the other endpoint dip
2928                          * was snapshotted.
2929                          */
2930                         me = DI_PATH(di_mem_addr(st, stored_offset));
2931                         *off_p = stored_offset;
2932 
2933                         di_path_one_endpoint(me, noff, &off_p, get_client);
2934 
2935                         /*
2936                          * The other endpoint and linkage were set when this
2937                          * pip was snapshotted. So we are done with both
2938                          * endpoints and linkages.
2939                          */
2940                         ASSERT(!(me->path_snap_state &
2941                             (DI_PATH_SNAP_NOCLIENT|DI_PATH_SNAP_NOPHCI)));
2942                         ASSERT(!(me->path_snap_state &
2943                             (DI_PATH_SNAP_NOCLINK|DI_PATH_SNAP_NOPLINK)));
2944 
2945                         mdi_pi_unlock(pip);
2946                         continue;
2947                 }
2948 
2949                 /*
2950                  * Now that we need to snapshot this pip, check memory
2951                  */
2952                 size = sizeof (struct di_path);
2953                 *off_p = off = di_checkmem(st, off, size);
2954                 me = DI_PATH(di_mem_addr(st, off));
2955                 me->self = off;
2956                 off += size;
2957 
2958                 me->path_snap_state =
2959                     DI_PATH_SNAP_NOCLINK | DI_PATH_SNAP_NOPLINK;
2960                 me->path_snap_state |=
2961                     DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOPHCI;
2962 
2963                 /*
2964                  * Zero out fields as di_checkmem() doesn't guarantee
2965                  * zero-filled memory
2966                  */
2967                 me->path_client = me->path_phci = 0;
2968                 me->path_c_link = me->path_p_link = 0;
2969 
2970                 di_path_one_endpoint(me, noff, &off_p, get_client);
2971 
2972                 /*
2973                  * Note the existence of this pathinfo
2974                  */
2975                 di_register_pip(st, pip, me->self);
2976 
2977                 me->path_state = path_state_convert(mdi_pi_get_state(pip));
2978                 me->path_flags = path_flags_convert(mdi_pi_get_flags(pip));
2979 
2980                 me->path_instance = mdi_pi_get_path_instance(pip);
2981 
2982                 /*
2983                  * Get intermediate addressing info.
2984                  */
2985                 size = strlen(mdi_pi_get_addr(pip)) + 1;
2986                 me->path_addr = off = di_checkmem(st, off, size);
2987                 (void) strcpy(di_mem_addr(st, off), mdi_pi_get_addr(pip));
2988                 off += size;
2989 
2990                 /*
2991                  * Get path properties if props are to be included in the
2992                  * snapshot
2993                  */
2994                 if (DINFOPROP & st->command) {
2995                         me->path_prop = off;
2996                         off = di_path_getprop(pip, &me->path_prop, st);
2997                 } else {
2998                         me->path_prop = 0;
2999                 }
3000 
3001                 mdi_pi_unlock(pip);
3002         }
3003 
3004         *off_p = 0;
3005         return (off);
3006 }
3007 
3008 /*
3009  * Return driver prop_op entry point for the specified devinfo node.
3010  *
3011  * To return a non-NULL value:
3012  * - driver must be attached and held:
3013  *   If driver is not attached we ignore the driver property list.
3014  *   No one should rely on such properties.
3015  * - driver "cb_prop_op != ddi_prop_op":
3016  *   If "cb_prop_op == ddi_prop_op", framework does not need to call driver.
3017  *   XXX or parent's bus_prop_op != ddi_bus_prop_op
3018  */
3019 static int
3020 (*di_getprop_prop_op(struct dev_info *dip))
3021         (dev_t, dev_info_t *, ddi_prop_op_t, int, char *, caddr_t, int *)
3022 {
3023         struct dev_ops  *ops;
3024 
3025         /* If driver is not attached we ignore the driver property list. */
3026         if ((dip == NULL) || !i_ddi_devi_attached((dev_info_t *)dip))
3027                 return (NULL);
3028 
3029         /*
3030          * Some nexus drivers incorrectly set cb_prop_op to nodev, nulldev,
3031          * or even NULL.
3032          */
3033         ops = dip->devi_ops;
3034         if (ops && ops->devo_cb_ops &&
3035             (ops->devo_cb_ops->cb_prop_op != ddi_prop_op) &&
3036             (ops->devo_cb_ops->cb_prop_op != nodev) &&
3037             (ops->devo_cb_ops->cb_prop_op != nulldev) &&
3038             (ops->devo_cb_ops->cb_prop_op != NULL))
3039                 return (ops->devo_cb_ops->cb_prop_op);
3040         return (NULL);
3041 }
3042 
3043 static di_off_t
3044 di_getprop_add(int list, int dyn, struct di_state *st, struct dev_info *dip,
3045     int (*prop_op)(),
3046     char *name, dev_t devt, int aflags, int alen, caddr_t aval,
3047     di_off_t off, di_off_t **off_pp)
3048 {
3049         int             need_free = 0;
3050         dev_t           pdevt;
3051         int             pflags;
3052         int             rv;
3053         caddr_t         val;
3054         int             len;
3055         size_t          size;
3056         struct di_prop  *pp;
3057 
3058         /* If we have prop_op function, ask driver for latest value */
3059         if (prop_op) {
3060                 ASSERT(dip);
3061 
3062                 /* Must search DDI_DEV_T_NONE with DDI_DEV_T_ANY */
3063                 pdevt = (devt == DDI_DEV_T_NONE) ? DDI_DEV_T_ANY : devt;
3064 
3065                 /*
3066                  * We have type information in flags, but are invoking an
3067                  * old non-typed prop_op(9E) interface. Since not all types are
3068                  * part of DDI_PROP_TYPE_ANY (example is DDI_PROP_TYPE_INT64),
3069                  * we set DDI_PROP_CONSUMER_TYPED - causing the framework to
3070                  * expand type bits beyond DDI_PROP_TYPE_ANY.  This allows us
3071                  * to use the legacy prop_op(9E) interface to obtain updates
3072                  * non-DDI_PROP_TYPE_ANY dynamic properties.
3073                  */
3074                 pflags = aflags & ~DDI_PROP_TYPE_MASK;
3075                 pflags |= DDI_PROP_DONTPASS | DDI_PROP_NOTPROM |
3076                     DDI_PROP_CONSUMER_TYPED;
3077 
3078                 /*
3079                  * Hold and exit across prop_op(9E) to avoid lock order
3080                  * issues between
3081                  *   [ndi_devi_enter() ..prop_op(9E).. driver-lock]
3082                  * .vs.
3083                  *   [..ioctl(9E).. driver-lock ..ddi_remove_minor_node(9F)..
3084                  *   ndi_devi_enter()]
3085                  * ordering.
3086                  */
3087                 ndi_hold_devi((dev_info_t *)dip);
3088                 ndi_devi_exit((dev_info_t *)dip, dip->devi_circular);
3089                 rv = (*prop_op)(pdevt, (dev_info_t *)dip,
3090                     PROP_LEN_AND_VAL_ALLOC, pflags, name, &val, &len);
3091                 ndi_devi_enter((dev_info_t *)dip, &dip->devi_circular);
3092                 ndi_rele_devi((dev_info_t *)dip);
3093 
3094                 if (rv == DDI_PROP_SUCCESS) {
3095                         need_free = 1;          /* dynamic prop obtained */
3096                 } else if (dyn) {
3097                         /*
3098                          * A dynamic property must succeed prop_op(9E) to show
3099                          * up in the snapshot - that is the only source of its
3100                          * value.
3101                          */
3102                         return (off);           /* dynamic prop not supported */
3103                 } else {
3104                         /*
3105                          * In case calling the driver caused an update off
3106                          * prop_op(9E) of a non-dynamic property (code leading
3107                          * to ddi_prop_change), we defer picking up val and
3108                          * len informatiojn until after prop_op(9E) to ensure
3109                          * that we snapshot the latest value.
3110                          */
3111                         val = aval;
3112                         len = alen;
3113 
3114                 }
3115         } else {
3116                 val = aval;
3117                 len = alen;
3118         }
3119 
3120         dcmn_err((CE_CONT, "di_getprop_add: list %d %s len %d val %p\n",
3121             list, name ? name : "NULL", len, (void *)val));
3122 
3123         size = sizeof (struct di_prop);
3124         **off_pp = off = di_checkmem(st, off, size);
3125         pp = DI_PROP(di_mem_addr(st, off));
3126         pp->self = off;
3127         off += size;
3128 
3129         pp->dev_major = getmajor(devt);
3130         pp->dev_minor = getminor(devt);
3131         pp->prop_flags = aflags;
3132         pp->prop_list = list;
3133 
3134         /* property name */
3135         if (name) {
3136                 size = strlen(name) + 1;
3137                 pp->prop_name = off = di_checkmem(st, off, size);
3138                 (void) strcpy(di_mem_addr(st, off), name);
3139                 off += size;
3140         } else {
3141                 pp->prop_name = -1;
3142         }
3143 
3144         pp->prop_len = len;
3145         if (val == NULL) {
3146                 pp->prop_data = -1;
3147         } else if (len != 0) {
3148                 size = len;
3149                 pp->prop_data = off = di_checkmem(st, off, size);
3150                 bcopy(val, di_mem_addr(st, off), size);
3151                 off += size;
3152         }
3153 
3154         pp->next = 0;                        /* assume tail for now */
3155         *off_pp = &pp->next;             /* return pointer to our next */
3156 
3157         if (need_free)                  /* free PROP_LEN_AND_VAL_ALLOC alloc */
3158                 kmem_free(val, len);
3159         return (off);
3160 }
3161 
3162 
3163 /*
3164  * Copy a list of properties attached to a devinfo node. Called from
3165  * di_copynode with active ndi_devi_enter. The major number is passed in case
3166  * we need to call driver's prop_op entry. The value of list indicates
3167  * which list we are copying. Possible values are:
3168  * DI_PROP_DRV_LIST, DI_PROP_SYS_LIST, DI_PROP_GLB_LIST, DI_PROP_HW_LIST
3169  */
3170 static di_off_t
3171 di_getprop(int list, struct ddi_prop **pprop, di_off_t *off_p,
3172     struct di_state *st, struct dev_info *dip)
3173 {
3174         struct ddi_prop         *prop;
3175         int                     (*prop_op)();
3176         int                     off;
3177         struct ddi_minor_data   *mn;
3178         i_ddi_prop_dyn_t        *dp;
3179         struct plist {
3180                 struct plist    *pl_next;
3181                 char            *pl_name;
3182                 int             pl_flags;
3183                 dev_t           pl_dev;
3184                 int             pl_len;
3185                 caddr_t         pl_val;
3186         }                       *pl, *pl0, **plp;
3187 
3188         ASSERT(st != NULL);
3189 
3190         off = *off_p;
3191         *off_p = 0;
3192         dcmn_err((CE_CONT, "di_getprop: copy property list %d at addr %p\n",
3193             list, (void *)*pprop));
3194 
3195         /* get pointer to driver's prop_op(9E) implementation if DRV_LIST */
3196         prop_op = (list == DI_PROP_DRV_LIST) ? di_getprop_prop_op(dip) : NULL;
3197 
3198         /*
3199          * Form private list of properties, holding devi_lock for properties
3200          * that hang off the dip.
3201          */
3202         if (dip)
3203                 mutex_enter(&(dip->devi_lock));
3204         for (pl0 = NULL, plp = &pl0, prop = *pprop;
3205             prop; plp = &pl->pl_next, prop = prop->prop_next) {
3206                 pl = kmem_alloc(sizeof (*pl), KM_SLEEP);
3207                 *plp = pl;
3208                 pl->pl_next = NULL;
3209                 if (prop->prop_name)
3210                         pl->pl_name = i_ddi_strdup(prop->prop_name, KM_SLEEP);
3211                 else
3212                         pl->pl_name = NULL;
3213                 pl->pl_flags = prop->prop_flags;
3214                 pl->pl_dev = prop->prop_dev;
3215                 if (prop->prop_len) {
3216                         pl->pl_len = prop->prop_len;
3217                         pl->pl_val = kmem_alloc(pl->pl_len, KM_SLEEP);
3218                         bcopy(prop->prop_val, pl->pl_val, pl->pl_len);
3219                 } else {
3220                         pl->pl_len = 0;
3221                         pl->pl_val = NULL;
3222                 }
3223         }
3224         if (dip)
3225                 mutex_exit(&(dip->devi_lock));
3226 
3227         /*
3228          * Now that we have dropped devi_lock, perform a second-pass to
3229          * add properties to the snapshot.  We do this as a second pass
3230          * because we may need to call prop_op(9E) and we can't hold
3231          * devi_lock across that call.
3232          */
3233         for (pl = pl0; pl; pl = pl0) {
3234                 pl0 = pl->pl_next;
3235                 off = di_getprop_add(list, 0, st, dip, prop_op, pl->pl_name,
3236                     pl->pl_dev, pl->pl_flags, pl->pl_len, pl->pl_val,
3237                     off, &off_p);
3238                 if (pl->pl_val)
3239                         kmem_free(pl->pl_val, pl->pl_len);
3240                 if (pl->pl_name)
3241                         kmem_free(pl->pl_name, strlen(pl->pl_name) + 1);
3242                 kmem_free(pl, sizeof (*pl));
3243         }
3244 
3245         /*
3246          * If there is no prop_op or dynamic property support has been
3247          * disabled, we are done.
3248          */
3249         if ((prop_op == NULL) || (di_prop_dyn == 0)) {
3250                 *off_p = 0;
3251                 return (off);
3252         }
3253 
3254         /* Add dynamic driver properties to snapshot */
3255         for (dp = i_ddi_prop_dyn_driver_get((dev_info_t *)dip);
3256             dp && dp->dp_name; dp++) {
3257                 if (dp->dp_spec_type) {
3258                         /* if spec_type, property of matching minor */
3259                         ASSERT(DEVI_BUSY_OWNED(dip));
3260                         for (mn = dip->devi_minor; mn; mn = mn->next) {
3261                                 if (mn->ddm_spec_type != dp->dp_spec_type)
3262                                         continue;
3263                                 off = di_getprop_add(list, 1, st, dip, prop_op,
3264                                     dp->dp_name, mn->ddm_dev, dp->dp_type,
3265                                     0, NULL, off, &off_p);
3266                         }
3267                 } else {
3268                         /* property of devinfo node */
3269                         off = di_getprop_add(list, 1, st, dip, prop_op,
3270                             dp->dp_name, DDI_DEV_T_NONE, dp->dp_type,
3271                             0, NULL, off, &off_p);
3272                 }
3273         }
3274 
3275         /* Add dynamic parent properties to snapshot */
3276         for (dp = i_ddi_prop_dyn_parent_get((dev_info_t *)dip);
3277             dp && dp->dp_name; dp++) {
3278                 if (dp->dp_spec_type) {
3279                         /* if spec_type, property of matching minor */
3280                         ASSERT(DEVI_BUSY_OWNED(dip));
3281                         for (mn = dip->devi_minor; mn; mn = mn->next) {
3282                                 if (mn->ddm_spec_type != dp->dp_spec_type)
3283                                         continue;
3284                                 off = di_getprop_add(list, 1, st, dip, prop_op,
3285                                     dp->dp_name, mn->ddm_dev, dp->dp_type,
3286                                     0, NULL, off, &off_p);
3287                         }
3288                 } else {
3289                         /* property of devinfo node */
3290                         off = di_getprop_add(list, 1, st, dip, prop_op,
3291                             dp->dp_name, DDI_DEV_T_NONE, dp->dp_type,
3292                             0, NULL, off, &off_p);
3293                 }
3294         }
3295 
3296         *off_p = 0;
3297         return (off);
3298 }
3299 
3300 /*
3301  * find private data format attached to a dip
3302  * parent = 1 to match driver name of parent dip (for parent private data)
3303  *      0 to match driver name of current dip (for driver private data)
3304  */
3305 #define DI_MATCH_DRIVER 0
3306 #define DI_MATCH_PARENT 1
3307 
3308 struct di_priv_format *
3309 di_match_drv_name(struct dev_info *node, struct di_state *st, int match)
3310 {
3311         int                     i, count, len;
3312         char                    *drv_name;
3313         major_t                 major;
3314         struct di_all           *all;
3315         struct di_priv_format   *form;
3316 
3317         dcmn_err2((CE_CONT, "di_match_drv_name: node = %s, match = %x\n",
3318             node->devi_node_name, match));
3319 
3320         if (match == DI_MATCH_PARENT) {
3321                 node = DEVI(node->devi_parent);
3322         }
3323 
3324         if (node == NULL) {
3325                 return (NULL);
3326         }
3327 
3328         major = node->devi_major;
3329         if (major == (major_t)(-1)) {
3330                 return (NULL);
3331         }
3332 
3333         /*
3334          * Match the driver name.
3335          */
3336         drv_name = ddi_major_to_name(major);
3337         if ((drv_name == NULL) || *drv_name == '\0') {
3338                 return (NULL);
3339         }
3340 
3341         /* Now get the di_priv_format array */
3342         all = DI_ALL_PTR(st);
3343         if (match == DI_MATCH_PARENT) {
3344                 count = all->n_ppdata;
3345                 form = DI_PRIV_FORMAT(di_mem_addr(st, all->ppdata_format));
3346         } else {
3347                 count = all->n_dpdata;
3348                 form = DI_PRIV_FORMAT(di_mem_addr(st, all->dpdata_format));
3349         }
3350 
3351         len = strlen(drv_name);
3352         for (i = 0; i < count; i++) {
3353                 char *tmp;
3354 
3355                 tmp = form[i].drv_name;
3356                 while (tmp && (*tmp != '\0')) {
3357                         if (strncmp(drv_name, tmp, len) == 0) {
3358                                 return (&form[i]);
3359                         }
3360                         /*
3361                          * Move to next driver name, skipping a white space
3362                          */
3363                         if (tmp = strchr(tmp, ' ')) {
3364                                 tmp++;
3365                         }
3366                 }
3367         }
3368 
3369         return (NULL);
3370 }
3371 
3372 /*
3373  * The following functions copy data as specified by the format passed in.
3374  * To prevent invalid format from panicing the system, we call on_fault().
3375  * A return value of 0 indicates an error. Otherwise, the total offset
3376  * is returned.
3377  */
3378 #define DI_MAX_PRIVDATA (PAGESIZE >> 1)   /* max private data size */
3379 
3380 static di_off_t
3381 di_getprvdata(struct di_priv_format *pdp, struct dev_info *node,
3382     void *data, di_off_t *off_p, struct di_state *st)
3383 {
3384         caddr_t         pa;
3385         void            *ptr;
3386         int             i, size, repeat;
3387         di_off_t        off, off0, *tmp;
3388         char            *path;
3389         label_t         ljb;
3390 
3391         dcmn_err2((CE_CONT, "di_getprvdata:\n"));
3392 
3393         /*
3394          * check memory availability. Private data size is
3395          * limited to DI_MAX_PRIVDATA.
3396          */
3397         off = di_checkmem(st, *off_p, DI_MAX_PRIVDATA);
3398         *off_p = off;
3399 
3400         if ((pdp->bytes == 0) || pdp->bytes > DI_MAX_PRIVDATA) {
3401                 goto failure;
3402         }
3403 
3404         if (!on_fault(&ljb)) {
3405                 /* copy the struct */
3406                 bcopy(data, di_mem_addr(st, off), pdp->bytes);
3407                 off0 = DI_ALIGN(pdp->bytes); /* XXX remove DI_ALIGN */
3408 
3409                 /* dereferencing pointers */
3410                 for (i = 0; i < MAX_PTR_IN_PRV; i++) {
3411 
3412                         if (pdp->ptr[i].size == 0) {
3413                                 goto success;   /* no more ptrs */
3414                         }
3415 
3416                         /*
3417                          * first, get the pointer content
3418                          */
3419                         if ((pdp->ptr[i].offset < 0) ||
3420                             (pdp->ptr[i].offset > pdp->bytes - sizeof (char *)))
3421                                 goto failure;   /* wrong offset */
3422 
3423                         pa = di_mem_addr(st, off + pdp->ptr[i].offset);
3424 
3425                         /* save a tmp ptr to store off_t later */
3426                         tmp = (di_off_t *)(intptr_t)pa;
3427 
3428                         /* get pointer value, if NULL continue */
3429                         ptr = *((void **) (intptr_t)pa);
3430                         if (ptr == NULL) {
3431                                 continue;
3432                         }
3433 
3434                         /*
3435                          * next, find the repeat count (array dimension)
3436                          */
3437                         repeat = pdp->ptr[i].len_offset;
3438 
3439                         /*
3440                          * Positive value indicates a fixed sized array.
3441                          * 0 or negative value indicates variable sized array.
3442                          *
3443                          * For variable sized array, the variable must be
3444                          * an int member of the structure, with an offset
3445                          * equal to the absolution value of struct member.
3446                          */
3447                         if (repeat > pdp->bytes - sizeof (int)) {
3448                                 goto failure;   /* wrong offset */
3449                         }
3450 
3451                         if (repeat >= 0) {
3452                                 repeat = *((int *)
3453                                     (intptr_t)((caddr_t)data + repeat));
3454                         } else {
3455                                 repeat = -repeat;
3456                         }
3457 
3458                         /*
3459                          * next, get the size of the object to be copied
3460                          */
3461                         size = pdp->ptr[i].size * repeat;
3462 
3463                         /*
3464                          * Arbitrarily limit the total size of object to be
3465                          * copied (1 byte to 1/4 page).
3466                          */
3467                         if ((size <= 0) || (size > (DI_MAX_PRIVDATA - off0))) {
3468                                 goto failure;   /* wrong size or too big */
3469                         }
3470 
3471                         /*
3472                          * Now copy the data
3473                          */
3474                         *tmp = off0;
3475                         bcopy(ptr, di_mem_addr(st, off + off0), size);
3476                         off0 += DI_ALIGN(size); /* XXX remove DI_ALIGN */
3477                 }
3478         } else {
3479                 goto failure;
3480         }
3481 
3482 success:
3483         /*
3484          * success if reached here
3485          */
3486         no_fault();
3487         return (off + off0);
3488         /*NOTREACHED*/
3489 
3490 failure:
3491         /*
3492          * fault occurred
3493          */
3494         no_fault();
3495         path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3496         cmn_err(CE_WARN, "devinfo: fault on private data for '%s' at %p",
3497             ddi_pathname((dev_info_t *)node, path), data);
3498         kmem_free(path, MAXPATHLEN);
3499         *off_p = -1;    /* set private data to indicate error */
3500 
3501         return (off);
3502 }
3503 
3504 /*
3505  * get parent private data; on error, returns original offset
3506  */
3507 static di_off_t
3508 di_getppdata(struct dev_info *node, di_off_t *off_p, struct di_state *st)
3509 {
3510         int                     off;
3511         struct di_priv_format   *ppdp;
3512 
3513         dcmn_err2((CE_CONT, "di_getppdata:\n"));
3514 
3515         /* find the parent data format */
3516         if ((ppdp = di_match_drv_name(node, st, DI_MATCH_PARENT)) == NULL) {
3517                 off = *off_p;
3518                 *off_p = 0;     /* set parent data to none */
3519                 return (off);
3520         }
3521 
3522         return (di_getprvdata(ppdp, node,
3523             ddi_get_parent_data((dev_info_t *)node), off_p, st));
3524 }
3525 
3526 /*
3527  * get parent private data; returns original offset
3528  */
3529 static di_off_t
3530 di_getdpdata(struct dev_info *node, di_off_t *off_p, struct di_state *st)
3531 {
3532         int                     off;
3533         struct di_priv_format   *dpdp;
3534 
3535         dcmn_err2((CE_CONT, "di_getdpdata:"));
3536 
3537         /* find the parent data format */
3538         if ((dpdp = di_match_drv_name(node, st, DI_MATCH_DRIVER)) == NULL) {
3539                 off = *off_p;
3540                 *off_p = 0;     /* set driver data to none */
3541                 return (off);
3542         }
3543 
3544         return (di_getprvdata(dpdp, node,
3545             ddi_get_driver_private((dev_info_t *)node), off_p, st));
3546 }
3547 
3548 /*
3549  * Copy hotplug data associated with a devinfo node into the snapshot.
3550  */
3551 static di_off_t
3552 di_gethpdata(ddi_hp_cn_handle_t *hp_hdl, di_off_t *off_p,
3553     struct di_state *st)
3554 {
3555         struct i_hp     *hp;
3556         struct di_hp    *me;
3557         size_t          size;
3558         di_off_t        off;
3559 
3560         dcmn_err2((CE_CONT, "di_gethpdata:\n"));
3561 
3562         /*
3563          * check memory first
3564          */
3565         off = di_checkmem(st, *off_p, sizeof (struct di_hp));
3566         *off_p = off;
3567 
3568         do {
3569                 me = DI_HP(di_mem_addr(st, off));
3570                 me->self = off;
3571                 me->hp_name = 0;
3572                 me->hp_connection = (int)hp_hdl->cn_info.cn_num;
3573                 me->hp_depends_on = (int)hp_hdl->cn_info.cn_num_dpd_on;
3574                 (void) ddihp_cn_getstate(hp_hdl);
3575                 me->hp_state = (int)hp_hdl->cn_info.cn_state;
3576                 me->hp_type = (int)hp_hdl->cn_info.cn_type;
3577                 me->hp_type_str = 0;
3578                 me->hp_last_change = (uint32_t)hp_hdl->cn_info.cn_last_change;
3579                 me->hp_child = 0;
3580 
3581                 /*
3582                  * Child links are resolved later by di_hotplug_children().
3583                  * Store a reference to this di_hp_t in the list used later
3584                  * by di_hotplug_children().
3585                  */
3586                 hp = kmem_zalloc(sizeof (i_hp_t), KM_SLEEP);
3587                 hp->hp_off = off;
3588                 hp->hp_child = hp_hdl->cn_info.cn_child;
3589                 list_insert_tail(&st->hp_list, hp);
3590 
3591                 off += sizeof (struct di_hp);
3592 
3593                 /* Add name of this di_hp_t to the snapshot */
3594                 if (hp_hdl->cn_info.cn_name) {
3595                         size = strlen(hp_hdl->cn_info.cn_name) + 1;
3596                         me->hp_name = off = di_checkmem(st, off, size);
3597                         (void) strcpy(di_mem_addr(st, off),
3598                             hp_hdl->cn_info.cn_name);
3599                         off += size;
3600                 }
3601 
3602                 /* Add type description of this di_hp_t to the snapshot */
3603                 if (hp_hdl->cn_info.cn_type_str) {
3604                         size = strlen(hp_hdl->cn_info.cn_type_str) + 1;
3605                         me->hp_type_str = off = di_checkmem(st, off, size);
3606                         (void) strcpy(di_mem_addr(st, off),
3607                             hp_hdl->cn_info.cn_type_str);
3608                         off += size;
3609                 }
3610 
3611                 /*
3612                  * Set link to next in the chain of di_hp_t nodes,
3613                  * or terminate the chain when processing the last node.
3614                  */
3615                 if (hp_hdl->next != NULL) {
3616                         off = di_checkmem(st, off, sizeof (struct di_hp));
3617                         me->next = off;
3618                 } else {
3619                         me->next = 0;
3620                 }
3621 
3622                 /* Update pointer to next in the chain */
3623                 hp_hdl = hp_hdl->next;
3624 
3625         } while (hp_hdl);
3626 
3627         return (off);
3628 }
3629 
3630 /*
3631  * The driver is stateful across DINFOCPYALL and DINFOUSRLD.
3632  * This function encapsulates the state machine:
3633  *
3634  *      -> IOC_IDLE -> IOC_SNAP -> IOC_DONE -> IOC_COPY ->
3635  *      |               SNAPSHOT                USRLD    |
3636  *      --------------------------------------------------
3637  *
3638  * Returns 0 on success and -1 on failure
3639  */
3640 static int
3641 di_setstate(struct di_state *st, int new_state)
3642 {
3643         int     ret = 0;
3644 
3645         mutex_enter(&di_lock);
3646         switch (new_state) {
3647         case IOC_IDLE:
3648         case IOC_DONE:
3649                 break;
3650         case IOC_SNAP:
3651                 if (st->di_iocstate != IOC_IDLE)
3652                         ret = -1;
3653                 break;
3654         case IOC_COPY:
3655                 if (st->di_iocstate != IOC_DONE)
3656                         ret = -1;
3657                 break;
3658         default:
3659                 ret = -1;
3660         }
3661 
3662         if (ret == 0)
3663                 st->di_iocstate = new_state;
3664         else
3665                 cmn_err(CE_NOTE, "incorrect state transition from %d to %d",
3666                     st->di_iocstate, new_state);
3667         mutex_exit(&di_lock);
3668         return (ret);
3669 }
3670 
3671 /*
3672  * We cannot assume the presence of the entire
3673  * snapshot in this routine. All we are guaranteed
3674  * is the di_all struct + 1 byte (for root_path)
3675  */
3676 static int
3677 header_plus_one_ok(struct di_all *all)
3678 {
3679         /*
3680          * Refuse to read old versions
3681          */
3682         if (all->version != DI_SNAPSHOT_VERSION) {
3683                 CACHE_DEBUG((DI_ERR, "bad version: 0x%x", all->version));
3684                 return (0);
3685         }
3686 
3687         if (all->cache_magic != DI_CACHE_MAGIC) {
3688                 CACHE_DEBUG((DI_ERR, "bad magic #: 0x%x", all->cache_magic));
3689                 return (0);
3690         }
3691 
3692         if (all->snapshot_time == 0) {
3693                 CACHE_DEBUG((DI_ERR, "bad timestamp: %ld", all->snapshot_time));
3694                 return (0);
3695         }
3696 
3697         if (all->top_devinfo == 0) {
3698                 CACHE_DEBUG((DI_ERR, "NULL top devinfo"));
3699                 return (0);
3700         }
3701 
3702         if (all->map_size < sizeof (*all) + 1) {
3703                 CACHE_DEBUG((DI_ERR, "bad map size: %u", all->map_size));
3704                 return (0);
3705         }
3706 
3707         if (all->root_path[0] != '/' || all->root_path[1] != '\0') {
3708                 CACHE_DEBUG((DI_ERR, "bad rootpath: %c%c",
3709                     all->root_path[0], all->root_path[1]));
3710                 return (0);
3711         }
3712 
3713         /*
3714          * We can't check checksum here as we just have the header
3715          */
3716 
3717         return (1);
3718 }
3719 
3720 static int
3721 chunk_write(struct vnode *vp, offset_t off, caddr_t buf, size_t len)
3722 {
3723         rlim64_t        rlimit;
3724         ssize_t         resid;
3725         int             error = 0;
3726 
3727 
3728         rlimit = RLIM64_INFINITY;
3729 
3730         while (len) {
3731                 resid = 0;
3732                 error = vn_rdwr(UIO_WRITE, vp, buf, len, off,
3733                     UIO_SYSSPACE, FSYNC, rlimit, kcred, &resid);
3734 
3735                 if (error || resid < 0) {
3736                         error = error ? error : EIO;
3737                         CACHE_DEBUG((DI_ERR, "write error: %d", error));
3738                         break;
3739                 }
3740 
3741                 /*
3742                  * Check if we are making progress
3743                  */
3744                 if (resid >= len) {
3745                         error = ENOSPC;
3746                         break;
3747                 }
3748                 buf += len - resid;
3749                 off += len - resid;
3750                 len = resid;
3751         }
3752 
3753         return (error);
3754 }
3755 
3756 static void
3757 di_cache_write(struct di_cache *cache)
3758 {
3759         struct di_all   *all;
3760         struct vnode    *vp;
3761         int             oflags;
3762         size_t          map_size;
3763         size_t          chunk;
3764         offset_t        off;
3765         int             error;
3766         char            *buf;
3767 
3768         ASSERT(DI_CACHE_LOCKED(*cache));
3769         ASSERT(!servicing_interrupt());
3770 
3771         if (cache->cache_size == 0) {
3772                 ASSERT(cache->cache_data == NULL);
3773                 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping write"));
3774                 return;
3775         }
3776 
3777         ASSERT(cache->cache_size > 0);
3778         ASSERT(cache->cache_data);
3779 
3780         if (!modrootloaded || rootvp == NULL || vn_is_readonly(rootvp)) {
3781                 CACHE_DEBUG((DI_ERR, "Can't write to rootFS. Skipping write"));
3782                 return;
3783         }
3784 
3785         all = (struct di_all *)cache->cache_data;
3786 
3787         if (!header_plus_one_ok(all)) {
3788                 CACHE_DEBUG((DI_ERR, "Invalid header. Skipping write"));
3789                 return;
3790         }
3791 
3792         ASSERT(strcmp(all->root_path, "/") == 0);
3793 
3794         /*
3795          * The cache_size is the total allocated memory for the cache.
3796          * The map_size is the actual size of valid data in the cache.
3797          * map_size may be smaller than cache_size but cannot exceed
3798          * cache_size.
3799          */
3800         if (all->map_size > cache->cache_size) {
3801                 CACHE_DEBUG((DI_ERR, "map_size (0x%x) > cache_size (0x%x)."
3802                     " Skipping write", all->map_size, cache->cache_size));
3803                 return;
3804         }
3805 
3806         /*
3807          * First unlink the temp file
3808          */
3809         error = vn_remove(DI_CACHE_TEMP, UIO_SYSSPACE, RMFILE);
3810         if (error && error != ENOENT) {
3811                 CACHE_DEBUG((DI_ERR, "%s: unlink failed: %d",
3812                     DI_CACHE_TEMP, error));
3813         }
3814 
3815         if (error == EROFS) {
3816                 CACHE_DEBUG((DI_ERR, "RDONLY FS. Skipping write"));
3817                 return;
3818         }
3819 
3820         vp = NULL;
3821         oflags = (FCREAT|FWRITE);
3822         if (error = vn_open(DI_CACHE_TEMP, UIO_SYSSPACE, oflags,
3823             DI_CACHE_PERMS, &vp, CRCREAT, 0)) {
3824                 CACHE_DEBUG((DI_ERR, "%s: create failed: %d",
3825                     DI_CACHE_TEMP, error));
3826                 return;
3827         }
3828 
3829         ASSERT(vp);
3830 
3831         /*
3832          * Paranoid: Check if the file is on a read-only FS
3833          */
3834         if (vn_is_readonly(vp)) {
3835                 CACHE_DEBUG((DI_ERR, "cannot write: readonly FS"));
3836                 goto fail;
3837         }
3838 
3839         /*
3840          * Note that we only write map_size bytes to disk - this saves
3841          * space as the actual cache size may be larger than size of
3842          * valid data in the cache.
3843          * Another advantage is that it makes verification of size
3844          * easier when the file is read later.
3845          */
3846         map_size = all->map_size;
3847         off = 0;
3848         buf = cache->cache_data;
3849 
3850         while (map_size) {
3851                 ASSERT(map_size > 0);
3852                 /*
3853                  * Write in chunks so that VM system
3854                  * is not overwhelmed
3855                  */
3856                 if (map_size > di_chunk * PAGESIZE)
3857                         chunk = di_chunk * PAGESIZE;
3858                 else
3859                         chunk = map_size;
3860 
3861                 error = chunk_write(vp, off, buf, chunk);
3862                 if (error) {
3863                         CACHE_DEBUG((DI_ERR, "write failed: off=0x%x: %d",
3864                             off, error));
3865                         goto fail;
3866                 }
3867 
3868                 off += chunk;
3869                 buf += chunk;
3870                 map_size -= chunk;
3871 
3872                 /* If low on memory, give pageout a chance to run */
3873                 if (freemem < desfree)
3874                         delay(1);
3875         }
3876 
3877         /*
3878          * Now sync the file and close it
3879          */
3880         if (error = VOP_FSYNC(vp, FSYNC, kcred, NULL)) {
3881                 CACHE_DEBUG((DI_ERR, "FSYNC failed: %d", error));
3882         }
3883 
3884         if (error = VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred, NULL)) {
3885                 CACHE_DEBUG((DI_ERR, "close() failed: %d", error));
3886                 VN_RELE(vp);
3887                 return;
3888         }
3889 
3890         VN_RELE(vp);
3891 
3892         /*
3893          * Now do the rename
3894          */
3895         if (error = vn_rename(DI_CACHE_TEMP, DI_CACHE_FILE, UIO_SYSSPACE)) {
3896                 CACHE_DEBUG((DI_ERR, "rename failed: %d", error));
3897                 return;
3898         }
3899 
3900         CACHE_DEBUG((DI_INFO, "Cache write successful."));
3901 
3902         return;
3903 
3904 fail:
3905         (void) VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred, NULL);
3906         VN_RELE(vp);
3907 }
3908 
3909 
3910 /*
3911  * Since we could be called early in boot,
3912  * use kobj_read_file()
3913  */
3914 static void
3915 di_cache_read(struct di_cache *cache)
3916 {
3917         struct _buf     *file;
3918         struct di_all   *all;
3919         int             n;
3920         size_t          map_size, sz, chunk;
3921         offset_t        off;
3922         caddr_t         buf;
3923         uint32_t        saved_crc, crc;
3924 
3925         ASSERT(modrootloaded);
3926         ASSERT(DI_CACHE_LOCKED(*cache));
3927         ASSERT(cache->cache_data == NULL);
3928         ASSERT(cache->cache_size == 0);
3929         ASSERT(!servicing_interrupt());
3930 
3931         file = kobj_open_file(DI_CACHE_FILE);
3932         if (file == (struct _buf *)-1) {
3933                 CACHE_DEBUG((DI_ERR, "%s: open failed: %d",
3934                     DI_CACHE_FILE, ENOENT));
3935                 return;
3936         }
3937 
3938         /*
3939          * Read in the header+root_path first. The root_path must be "/"
3940          */
3941         all = kmem_zalloc(sizeof (*all) + 1, KM_SLEEP);
3942         n = kobj_read_file(file, (caddr_t)all, sizeof (*all) + 1, 0);
3943 
3944         if ((n != sizeof (*all) + 1) || !header_plus_one_ok(all)) {
3945                 kmem_free(all, sizeof (*all) + 1);
3946                 kobj_close_file(file);
3947                 CACHE_DEBUG((DI_ERR, "cache header: read error or invalid"));
3948                 return;
3949         }
3950 
3951         map_size = all->map_size;
3952 
3953         kmem_free(all, sizeof (*all) + 1);
3954 
3955         ASSERT(map_size >= sizeof (*all) + 1);
3956 
3957         buf = di_cache.cache_data = kmem_alloc(map_size, KM_SLEEP);
3958         sz = map_size;
3959         off = 0;
3960         while (sz) {
3961                 /* Don't overload VM with large reads */
3962                 chunk = (sz > di_chunk * PAGESIZE) ? di_chunk * PAGESIZE : sz;
3963                 n = kobj_read_file(file, buf, chunk, off);
3964                 if (n != chunk) {
3965                         CACHE_DEBUG((DI_ERR, "%s: read error at offset: %lld",
3966                             DI_CACHE_FILE, off));
3967                         goto fail;
3968                 }
3969                 off += chunk;
3970                 buf += chunk;
3971                 sz -= chunk;
3972         }
3973 
3974         ASSERT(off == map_size);
3975 
3976         /*
3977          * Read past expected EOF to verify size.
3978          */
3979         if (kobj_read_file(file, (caddr_t)&sz, 1, off) > 0) {
3980                 CACHE_DEBUG((DI_ERR, "%s: file size changed", DI_CACHE_FILE));
3981                 goto fail;
3982         }
3983 
3984         all = (struct di_all *)di_cache.cache_data;
3985         if (!header_plus_one_ok(all)) {
3986                 CACHE_DEBUG((DI_ERR, "%s: file header changed", DI_CACHE_FILE));
3987                 goto fail;
3988         }
3989 
3990         /*
3991          * Compute CRC with checksum field in the cache data set to 0
3992          */
3993         saved_crc = all->cache_checksum;
3994         all->cache_checksum = 0;
3995         CRC32(crc, di_cache.cache_data, map_size, -1U, crc32_table);
3996         all->cache_checksum = saved_crc;
3997 
3998         if (crc != all->cache_checksum) {
3999                 CACHE_DEBUG((DI_ERR,
4000                     "%s: checksum error: expected=0x%x actual=0x%x",
4001                     DI_CACHE_FILE, all->cache_checksum, crc));
4002                 goto fail;
4003         }
4004 
4005         if (all->map_size != map_size) {
4006                 CACHE_DEBUG((DI_ERR, "%s: map size changed", DI_CACHE_FILE));
4007                 goto fail;
4008         }
4009 
4010         kobj_close_file(file);
4011 
4012         di_cache.cache_size = map_size;
4013 
4014         return;
4015 
4016 fail:
4017         kmem_free(di_cache.cache_data, map_size);
4018         kobj_close_file(file);
4019         di_cache.cache_data = NULL;
4020         di_cache.cache_size = 0;
4021 }
4022 
4023 
4024 /*
4025  * Checks if arguments are valid for using the cache.
4026  */
4027 static int
4028 cache_args_valid(struct di_state *st, int *error)
4029 {
4030         ASSERT(error);
4031         ASSERT(st->mem_size > 0);
4032         ASSERT(st->memlist != NULL);
4033 
4034         if (!modrootloaded || !i_ddi_io_initialized()) {
4035                 CACHE_DEBUG((DI_ERR,
4036                     "cache lookup failure: I/O subsystem not inited"));
4037                 *error = ENOTACTIVE;
4038                 return (0);
4039         }
4040 
4041         /*
4042          * No other flags allowed with DINFOCACHE
4043          */
4044         if (st->command != (DINFOCACHE & DIIOC_MASK)) {
4045                 CACHE_DEBUG((DI_ERR,
4046                     "cache lookup failure: bad flags: 0x%x",
4047                     st->command));
4048                 *error = EINVAL;
4049                 return (0);
4050         }
4051 
4052         if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) {
4053                 CACHE_DEBUG((DI_ERR,
4054                     "cache lookup failure: bad root: %s",
4055                     DI_ALL_PTR(st)->root_path));
4056                 *error = EINVAL;
4057                 return (0);
4058         }
4059 
4060         CACHE_DEBUG((DI_INFO, "cache lookup args ok: 0x%x", st->command));
4061 
4062         *error = 0;
4063 
4064         return (1);
4065 }
4066 
4067 static int
4068 snapshot_is_cacheable(struct di_state *st)
4069 {
4070         ASSERT(st->mem_size > 0);
4071         ASSERT(st->memlist != NULL);
4072 
4073         if ((st->command & DI_CACHE_SNAPSHOT_FLAGS) !=
4074             (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK)) {
4075                 CACHE_DEBUG((DI_INFO,
4076                     "not cacheable: incompatible flags: 0x%x",
4077                     st->command));
4078                 return (0);
4079         }
4080 
4081         if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) {
4082                 CACHE_DEBUG((DI_INFO,
4083                     "not cacheable: incompatible root path: %s",
4084                     DI_ALL_PTR(st)->root_path));
4085                 return (0);
4086         }
4087 
4088         CACHE_DEBUG((DI_INFO, "cacheable snapshot request: 0x%x", st->command));
4089 
4090         return (1);
4091 }
4092 
4093 static int
4094 di_cache_lookup(struct di_state *st)
4095 {
4096         size_t  rval;
4097         int     cache_valid;
4098 
4099         ASSERT(cache_args_valid(st, &cache_valid));
4100         ASSERT(modrootloaded);
4101 
4102         DI_CACHE_LOCK(di_cache);
4103 
4104         /*
4105          * The following assignment determines the validity
4106          * of the cache as far as this snapshot is concerned.
4107          */
4108         cache_valid = di_cache.cache_valid;
4109 
4110         if (cache_valid && di_cache.cache_data == NULL) {
4111                 di_cache_read(&di_cache);
4112                 /* check for read or file error */
4113                 if (di_cache.cache_data == NULL)
4114                         cache_valid = 0;
4115         }
4116 
4117         if (cache_valid) {
4118                 /*
4119                  * Ok, the cache was valid as of this particular
4120                  * snapshot. Copy the cached snapshot. This is safe
4121                  * to do as the cache cannot be freed (we hold the
4122                  * cache lock). Free the memory allocated in di_state
4123                  * up until this point - we will simply copy everything
4124                  * in the cache.
4125                  */
4126 
4127                 ASSERT(di_cache.cache_data != NULL);
4128                 ASSERT(di_cache.cache_size > 0);
4129 
4130                 di_freemem(st);
4131 
4132                 rval = 0;
4133                 if (di_cache2mem(&di_cache, st) > 0) {
4134                         /*
4135                          * map_size is size of valid data in the
4136                          * cached snapshot and may be less than
4137                          * size of the cache.
4138                          */
4139                         ASSERT(DI_ALL_PTR(st));
4140                         rval = DI_ALL_PTR(st)->map_size;
4141 
4142                         ASSERT(rval >= sizeof (struct di_all));
4143                         ASSERT(rval <= di_cache.cache_size);
4144                 }
4145         } else {
4146                 /*
4147                  * The cache isn't valid, we need to take a snapshot.
4148                  * Set the command flags appropriately
4149                  */
4150                 ASSERT(st->command == (DINFOCACHE & DIIOC_MASK));
4151                 st->command = (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK);
4152                 rval = di_cache_update(st);
4153                 st->command = (DINFOCACHE & DIIOC_MASK);
4154         }
4155 
4156         DI_CACHE_UNLOCK(di_cache);
4157 
4158         /*
4159          * For cached snapshots, the devinfo driver always returns
4160          * a snapshot rooted at "/".
4161          */
4162         ASSERT(rval == 0 || strcmp(DI_ALL_PTR(st)->root_path, "/") == 0);
4163 
4164         return ((int)rval);
4165 }
4166 
4167 /*
4168  * This is a forced update of the cache  - the previous state of the cache
4169  * may be:
4170  *      - unpopulated
4171  *      - populated and invalid
4172  *      - populated and valid
4173  */
4174 static int
4175 di_cache_update(struct di_state *st)
4176 {
4177         int             rval;
4178         uint32_t        crc;
4179         struct di_all   *all;
4180 
4181         ASSERT(DI_CACHE_LOCKED(di_cache));
4182         ASSERT(snapshot_is_cacheable(st));
4183 
4184         /*
4185          * Free the in-core cache and the on-disk file (if they exist)
4186          */
4187         i_ddi_di_cache_free(&di_cache);
4188 
4189         /*
4190          * Set valid flag before taking the snapshot,
4191          * so that any invalidations that arrive
4192          * during or after the snapshot are not
4193          * removed by us.
4194          */
4195         atomic_or_32(&di_cache.cache_valid, 1);
4196 
4197         rval = di_snapshot_and_clean(st);
4198 
4199         if (rval == 0) {
4200                 CACHE_DEBUG((DI_ERR, "can't update cache: bad snapshot"));
4201                 return (0);
4202         }
4203 
4204         DI_ALL_PTR(st)->map_size = rval;
4205         if (di_mem2cache(st, &di_cache) == 0) {
4206                 CACHE_DEBUG((DI_ERR, "can't update cache: copy failed"));
4207                 return (0);
4208         }
4209 
4210         ASSERT(di_cache.cache_data);
4211         ASSERT(di_cache.cache_size > 0);
4212 
4213         /*
4214          * Now that we have cached the snapshot, compute its checksum.
4215          * The checksum is only computed over the valid data in the
4216          * cache, not the entire cache.
4217          * Also, set all the fields (except checksum) before computing
4218          * checksum.
4219          */
4220         all = (struct di_all *)di_cache.cache_data;
4221         all->cache_magic = DI_CACHE_MAGIC;
4222         all->map_size = rval;
4223 
4224         ASSERT(all->cache_checksum == 0);
4225         CRC32(crc, di_cache.cache_data, all->map_size, -1U, crc32_table);
4226         all->cache_checksum = crc;
4227 
4228         di_cache_write(&di_cache);
4229 
4230         return (rval);
4231 }
4232 
4233 static void
4234 di_cache_print(di_cache_debug_t msglevel, char *fmt, ...)
4235 {
4236         va_list ap;
4237 
4238         if (di_cache_debug <= DI_QUIET)
4239                 return;
4240 
4241         if (di_cache_debug < msglevel)
4242                 return;
4243 
4244         switch (msglevel) {
4245                 case DI_ERR:
4246                         msglevel = CE_WARN;
4247                         break;
4248                 case DI_INFO:
4249                 case DI_TRACE:
4250                 default:
4251                         msglevel = CE_NOTE;
4252                         break;
4253         }
4254 
4255         va_start(ap, fmt);
4256         vcmn_err(msglevel, fmt, ap);
4257         va_end(ap);
4258 }
4259 
4260 static void
4261 di_hotplug_children(struct di_state *st)
4262 {
4263         di_off_t        off;
4264         struct di_hp    *hp;
4265         struct i_hp     *hp_list_node;
4266 
4267         while (hp_list_node = (struct i_hp *)list_remove_head(&st->hp_list)) {
4268 
4269                 if ((hp_list_node->hp_child != NULL) &&
4270                     (di_dip_find(st, hp_list_node->hp_child, &off) == 0)) {
4271                         hp = DI_HP(di_mem_addr(st, hp_list_node->hp_off));
4272                         hp->hp_child = off;
4273                 }
4274 
4275                 kmem_free(hp_list_node, sizeof (i_hp_t));
4276         }
4277 
4278         list_destroy(&st->hp_list);
4279 }