1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2018 Nexenta Systems, Inc.
  25  */
  26 
  27 /*
  28  * miscellaneous routines for the devfs
  29  */
  30 
  31 #include <sys/types.h>
  32 #include <sys/param.h>
  33 #include <sys/t_lock.h>
  34 #include <sys/systm.h>
  35 #include <sys/sysmacros.h>
  36 #include <sys/user.h>
  37 #include <sys/time.h>
  38 #include <sys/vfs.h>
  39 #include <sys/vnode.h>
  40 #include <sys/file.h>
  41 #include <sys/fcntl.h>
  42 #include <sys/flock.h>
  43 #include <sys/kmem.h>
  44 #include <sys/uio.h>
  45 #include <sys/errno.h>
  46 #include <sys/stat.h>
  47 #include <sys/cred.h>
  48 #include <sys/dirent.h>
  49 #include <sys/pathname.h>
  50 #include <sys/cmn_err.h>
  51 #include <sys/debug.h>
  52 #include <sys/modctl.h>
  53 #include <fs/fs_subr.h>
  54 #include <sys/fs/dv_node.h>
  55 #include <sys/fs/snode.h>
  56 #include <sys/sunndi.h>
  57 #include <sys/sunmdi.h>
  58 #include <sys/conf.h>
  59 
  60 #ifdef DEBUG
  61 int devfs_debug = 0x0;
  62 #endif
  63 
  64 const char      dvnm[] = "devfs";
  65 kmem_cache_t    *dv_node_cache; /* dv_node cache */
  66 
  67 /*
  68  * The devfs_clean_key is taken during a devfs_clean operation: it is used to
  69  * prevent unnecessary code execution and for detection of potential deadlocks.
  70  */
  71 uint_t          devfs_clean_key;
  72 
  73 struct dv_node *dvroot;
  74 
  75 /* prototype memory vattrs */
  76 vattr_t dv_vattr_dir = {
  77         AT_TYPE|AT_MODE|AT_UID|AT_GID,          /* va_mask */
  78         VDIR,                                   /* va_type */
  79         DV_DIRMODE_DEFAULT,                     /* va_mode */
  80         DV_UID_DEFAULT,                         /* va_uid */
  81         DV_GID_DEFAULT,                         /* va_gid */
  82         0,                                      /* va_fsid; */
  83         0,                                      /* va_nodeid; */
  84         0,                                      /* va_nlink; */
  85         0,                                      /* va_size; */
  86         0,                                      /* va_atime; */
  87         0,                                      /* va_mtime; */
  88         0,                                      /* va_ctime; */
  89         0,                                      /* va_rdev; */
  90         0,                                      /* va_blksize; */
  91         0,                                      /* va_nblocks; */
  92         0,                                      /* va_seq; */
  93 };
  94 
  95 vattr_t dv_vattr_file = {
  96         AT_TYPE|AT_MODE|AT_SIZE|AT_UID|AT_GID|AT_RDEV,  /* va_mask */
  97         0,                                      /* va_type */
  98         DV_DEVMODE_DEFAULT,                     /* va_mode */
  99         DV_UID_DEFAULT,                         /* va_uid */
 100         DV_GID_DEFAULT,                         /* va_gid */
 101         0,                                      /* va_fsid; */
 102         0,                                      /* va_nodeid; */
 103         0,                                      /* va_nlink; */
 104         0,                                      /* va_size; */
 105         0,                                      /* va_atime; */
 106         0,                                      /* va_mtime; */
 107         0,                                      /* va_ctime; */
 108         0,                                      /* va_rdev; */
 109         0,                                      /* va_blksize; */
 110         0,                                      /* va_nblocks; */
 111         0,                                      /* va_seq; */
 112 };
 113 
 114 vattr_t dv_vattr_priv = {
 115         AT_TYPE|AT_MODE|AT_SIZE|AT_UID|AT_GID|AT_RDEV,  /* va_mask */
 116         0,                                      /* va_type */
 117         DV_DEVMODE_PRIV,                        /* va_mode */
 118         DV_UID_DEFAULT,                         /* va_uid */
 119         DV_GID_DEFAULT,                         /* va_gid */
 120         0,                                      /* va_fsid; */
 121         0,                                      /* va_nodeid; */
 122         0,                                      /* va_nlink; */
 123         0,                                      /* va_size; */
 124         0,                                      /* va_atime; */
 125         0,                                      /* va_mtime; */
 126         0,                                      /* va_ctime; */
 127         0,                                      /* va_rdev; */
 128         0,                                      /* va_blksize; */
 129         0,                                      /* va_nblocks; */
 130         0,                                      /* va_seq; */
 131 };
 132 
 133 extern dev_info_t       *clone_dip;
 134 extern major_t          clone_major;
 135 extern struct dev_ops   *ddi_hold_driver(major_t);
 136 
 137 /* dv_node node constructor for kmem cache */
 138 static int
 139 i_dv_node_ctor(void *buf, void *cfarg, int flag)
 140 {
 141         _NOTE(ARGUNUSED(cfarg, flag))
 142         struct dv_node  *dv = (struct dv_node *)buf;
 143         struct vnode    *vp;
 144 
 145         bzero(buf, sizeof (struct dv_node));
 146         vp = dv->dv_vnode = vn_alloc(flag);
 147         if (vp == NULL) {
 148                 return (-1);
 149         }
 150         vp->v_data = dv;
 151         rw_init(&dv->dv_contents, NULL, RW_DEFAULT, NULL);
 152         return (0);
 153 }
 154 
 155 /* dv_node node destructor for kmem cache */
 156 static void
 157 i_dv_node_dtor(void *buf, void *arg)
 158 {
 159         _NOTE(ARGUNUSED(arg))
 160         struct dv_node  *dv = (struct dv_node *)buf;
 161         struct vnode    *vp = DVTOV(dv);
 162 
 163         rw_destroy(&dv->dv_contents);
 164         vn_invalid(vp);
 165         vn_free(vp);
 166 }
 167 
 168 
 169 /* initialize dv_node node cache */
 170 void
 171 dv_node_cache_init()
 172 {
 173         ASSERT(dv_node_cache == NULL);
 174         dv_node_cache = kmem_cache_create("dv_node_cache",
 175             sizeof (struct dv_node), 0, i_dv_node_ctor, i_dv_node_dtor,
 176             NULL, NULL, NULL, 0);
 177 
 178         tsd_create(&devfs_clean_key, NULL);
 179 }
 180 
 181 /* destroy dv_node node cache */
 182 void
 183 dv_node_cache_fini()
 184 {
 185         ASSERT(dv_node_cache != NULL);
 186         kmem_cache_destroy(dv_node_cache);
 187         dv_node_cache = NULL;
 188 
 189         tsd_destroy(&devfs_clean_key);
 190 }
 191 
 192 /*
 193  * dv_mkino - Generate a unique inode number for devfs nodes.
 194  *
 195  * Although ino_t is 64 bits, the inode number is truncated to 32 bits for 32
 196  * bit non-LARGEFILE applications. This means that there is a requirement to
 197  * maintain the inode number as a 32 bit value or applications will have
 198  * stat(2) calls fail with EOVERFLOW.  We form a 32 bit inode number from the
 199  * dev_t. but if the minor number is larger than L_MAXMIN32 we fold extra minor
 200  *
 201  * To generate inode numbers for directories, we assume that we will never use
 202  * more than half the major space - this allows for ~8190 drivers. We use this
 203  * upper major number space to allocate inode numbers for directories by
 204  * encoding the major and instance into this space.
 205  *
 206  * We also skew the result so that inode 2 is reserved for the root of the file
 207  * system.
 208  *
 209  * As part of the future support for 64-bit dev_t APIs, the upper minor bits
 210  * should be folded into the high inode bits by adding the following code
 211  * after "ino |= 1":
 212  *
 213  * #if (L_BITSMINOR32 != L_BITSMINOR)
 214  *              |* fold overflow minor bits into high bits of inode number *|
 215  *              ino |= ((ino_t)(minor >> L_BITSMINOR32)) << L_BITSMINOR;
 216  * #endif |* (L_BITSMINOR32 != L_BITSMINOR) *|
 217  *
 218  * This way only applications that use devices that overflow their minor
 219  * space will have an application level impact.
 220  */
 221 static ino_t
 222 dv_mkino(dev_info_t *devi, vtype_t typ, dev_t dev)
 223 {
 224         major_t         major;
 225         minor_t         minor;
 226         ino_t           ino;
 227         static int      warn;
 228 
 229         if (typ == VDIR) {
 230                 major = ((L_MAXMAJ32 + 1) >> 1) + DEVI(devi)->devi_major;
 231                 minor = ddi_get_instance(devi);
 232 
 233                 /* makedevice32 in high half of major number space */
 234                 ino = (ino_t)((major << L_BITSMINOR32) | (minor & L_MAXMIN32));
 235 
 236                 major = DEVI(devi)->devi_major;
 237         } else {
 238                 major = getmajor(dev);
 239                 minor = getminor(dev);
 240 
 241                 /* makedevice32 */
 242                 ino = (ino_t)((major << L_BITSMINOR32) | (minor & L_MAXMIN32));
 243 
 244                 /* make ino for VCHR different than VBLK */
 245                 ino <<= 1;
 246                 if (typ == VCHR)
 247                         ino |= 1;
 248         }
 249 
 250         ino += DV_ROOTINO + 1;          /* skew */
 251 
 252         /*
 253          * diagnose things a little early because adding the skew to a large
 254          * minor number could roll over the major.
 255          */
 256         if ((major >= (L_MAXMAJ32 >> 1)) && (warn == 0)) {
 257                 warn = 1;
 258                 cmn_err(CE_WARN, "%s: inode numbers are not unique", dvnm);
 259         }
 260 
 261         return (ino);
 262 }
 263 
 264 /*
 265  * Compare two nodes lexographically to balance avl tree
 266  */
 267 static int
 268 dv_compare_nodes(const struct dv_node *dv1, const struct dv_node *dv2)
 269 {
 270         int     rv;
 271 
 272         if ((rv = strcmp(dv1->dv_name, dv2->dv_name)) == 0)
 273                 return (0);
 274         return ((rv < 0) ? -1 : 1);
 275 }
 276 
 277 /*
 278  * dv_mkroot
 279  *
 280  * Build the first VDIR dv_node.
 281  */
 282 struct dv_node *
 283 dv_mkroot(struct vfs *vfsp, dev_t devfsdev)
 284 {
 285         struct dv_node  *dv;
 286         struct vnode    *vp;
 287 
 288         ASSERT(ddi_root_node() != NULL);
 289         ASSERT(dv_node_cache != NULL);
 290 
 291         dcmn_err3(("dv_mkroot\n"));
 292         dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP);
 293         vp = DVTOV(dv);
 294         vn_reinit(vp);
 295         vp->v_flag = VROOT;
 296         vp->v_vfsp = vfsp;
 297         vp->v_type = VDIR;
 298         vp->v_rdev = devfsdev;
 299         vn_setops(vp, dv_vnodeops);
 300         vn_exists(vp);
 301 
 302         dvroot = dv;
 303 
 304         dv->dv_name = NULL;          /* not needed */
 305         dv->dv_namelen = 0;
 306 
 307         dv->dv_devi = ddi_root_node();
 308 
 309         dv->dv_ino = DV_ROOTINO;
 310         dv->dv_nlink = 2;            /* name + . (no dv_insert) */
 311         dv->dv_dotdot = dv;          /* .. == self */
 312         dv->dv_attrvp = NULLVP;
 313         dv->dv_attr = NULL;
 314         dv->dv_flags = DV_BUILD;
 315         dv->dv_priv = NULL;
 316         dv->dv_busy = 0;
 317         dv->dv_dflt_mode = 0;
 318 
 319         avl_create(&dv->dv_entries,
 320             (int (*)(const void *, const void *))dv_compare_nodes,
 321             sizeof (struct dv_node), offsetof(struct dv_node, dv_avllink));
 322 
 323         return (dv);
 324 }
 325 
 326 /*
 327  * dv_mkdir
 328  *
 329  * Given an probed or attached nexus node, create a VDIR dv_node.
 330  * No dv_attrvp is created at this point.
 331  */
 332 struct dv_node *
 333 dv_mkdir(struct dv_node *ddv, dev_info_t *devi, char *nm)
 334 {
 335         struct dv_node  *dv;
 336         struct vnode    *vp;
 337         size_t          nmlen;
 338 
 339         ASSERT((devi));
 340         dcmn_err4(("dv_mkdir: %s\n", nm));
 341 
 342         dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP);
 343         nmlen = strlen(nm) + 1;
 344         dv->dv_name = kmem_alloc(nmlen, KM_SLEEP);
 345         bcopy(nm, dv->dv_name, nmlen);
 346         dv->dv_namelen = nmlen - 1;  /* '\0' not included */
 347 
 348         vp = DVTOV(dv);
 349         vn_reinit(vp);
 350         vp->v_flag = 0;
 351         vp->v_vfsp = DVTOV(ddv)->v_vfsp;
 352         vp->v_type = VDIR;
 353         vp->v_rdev = DVTOV(ddv)->v_rdev;
 354         vn_setops(vp, vn_getops(DVTOV(ddv)));
 355         vn_exists(vp);
 356 
 357         dv->dv_devi = devi;
 358         ndi_hold_devi(devi);
 359 
 360         dv->dv_ino = dv_mkino(devi, VDIR, NODEV);
 361         dv->dv_nlink = 0;            /* updated on insert */
 362         dv->dv_dotdot = ddv;
 363         dv->dv_attrvp = NULLVP;
 364         dv->dv_attr = NULL;
 365         dv->dv_flags = DV_BUILD;
 366         dv->dv_priv = NULL;
 367         dv->dv_busy = 0;
 368         dv->dv_dflt_mode = 0;
 369 
 370         avl_create(&dv->dv_entries,
 371             (int (*)(const void *, const void *))dv_compare_nodes,
 372             sizeof (struct dv_node), offsetof(struct dv_node, dv_avllink));
 373 
 374         return (dv);
 375 }
 376 
 377 /*
 378  * dv_mknod
 379  *
 380  * Given a minor node, create a VCHR or VBLK dv_node.
 381  * No dv_attrvp is created at this point.
 382  */
 383 static struct dv_node *
 384 dv_mknod(struct dv_node *ddv, dev_info_t *devi, char *nm,
 385     struct ddi_minor_data *dmd)
 386 {
 387         struct dv_node  *dv;
 388         struct vnode    *vp;
 389         size_t          nmlen;
 390 
 391         dcmn_err4(("dv_mknod: %s\n", nm));
 392 
 393         dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP);
 394         nmlen = strlen(nm) + 1;
 395         dv->dv_name = kmem_alloc(nmlen, KM_SLEEP);
 396         bcopy(nm, dv->dv_name, nmlen);
 397         dv->dv_namelen = nmlen - 1;  /* no '\0' */
 398 
 399         vp = DVTOV(dv);
 400         vn_reinit(vp);
 401         vp->v_flag = 0;
 402         vp->v_vfsp = DVTOV(ddv)->v_vfsp;
 403         vp->v_type = dmd->ddm_spec_type == S_IFCHR ? VCHR : VBLK;
 404         vp->v_rdev = dmd->ddm_dev;
 405         vn_setops(vp, vn_getops(DVTOV(ddv)));
 406         vn_exists(vp);
 407 
 408         ASSERT(DEVI_BUSY_OWNED(devi));
 409         ndi_hold_devi(devi);
 410 
 411         dv->dv_devi = devi;
 412         dv->dv_ino = dv_mkino(devi, vp->v_type, vp->v_rdev);
 413         dv->dv_nlink = 0;            /* updated on insert */
 414         dv->dv_dotdot = ddv;
 415         dv->dv_attrvp = NULLVP;
 416         dv->dv_attr = NULL;
 417         dv->dv_flags = 0;
 418 
 419         if (dmd->type == DDM_INTERNAL_PATH)
 420                 dv->dv_flags |= DV_INTERNAL;
 421         if (dmd->ddm_flags & DM_NO_FSPERM)
 422                 dv->dv_flags |= DV_NO_FSPERM;
 423 
 424         dv->dv_priv = dmd->ddm_node_priv;
 425         if (dv->dv_priv)
 426                 dphold(dv->dv_priv);
 427 
 428         /*
 429          * Minors created with ddi_create_priv_minor_node can specify
 430          * a default mode permission other than the devfs default.
 431          */
 432         if (dv->dv_priv || dv->dv_flags & DV_NO_FSPERM) {
 433                 dcmn_err5(("%s: dv_mknod default priv mode 0%o\n",
 434                     dv->dv_name, dmd->ddm_priv_mode));
 435                 dv->dv_flags |= DV_DFLT_MODE;
 436                 dv->dv_dflt_mode = dmd->ddm_priv_mode & S_IAMB;
 437         }
 438 
 439         return (dv);
 440 }
 441 
 442 /*
 443  * dv_destroy
 444  *
 445  * Destroy what we created in dv_mkdir or dv_mknod.
 446  * In the case of a *referenced* directory, do nothing.
 447  */
 448 void
 449 dv_destroy(struct dv_node *dv, uint_t flags)
 450 {
 451         vnode_t *vp = DVTOV(dv);
 452         ASSERT(dv->dv_nlink == 0);           /* no references */
 453 
 454         dcmn_err4(("dv_destroy: %s\n", dv->dv_name));
 455 
 456         /*
 457          * We may be asked to unlink referenced directories.
 458          * In this case, there is nothing to be done.
 459          * The eventual memory free will be done in
 460          * devfs_inactive.
 461          */
 462         if (vp->v_count != 0) {
 463                 ASSERT(vp->v_type == VDIR);
 464                 ASSERT(flags & DV_CLEAN_FORCE);
 465                 ASSERT(DV_STALE(dv));
 466                 return;
 467         }
 468 
 469         if (vp->v_type == VDIR) {
 470                 ASSERT(DV_FIRST_ENTRY(dv) == NULL);
 471                 avl_destroy(&dv->dv_entries);
 472         }
 473 
 474         if (dv->dv_attrvp != NULLVP)
 475                 VN_RELE(dv->dv_attrvp);
 476         if (dv->dv_attr != NULL)
 477                 kmem_free(dv->dv_attr, sizeof (struct vattr));
 478         if (dv->dv_name != NULL)
 479                 kmem_free(dv->dv_name, dv->dv_namelen + 1);
 480         if (dv->dv_devi != NULL) {
 481                 ndi_rele_devi(dv->dv_devi);
 482         }
 483         if (dv->dv_priv != NULL) {
 484                 dpfree(dv->dv_priv);
 485         }
 486 
 487         kmem_cache_free(dv_node_cache, dv);
 488 }
 489 
 490 /*
 491  * Find and hold dv_node by name
 492  */
 493 static struct dv_node *
 494 dv_findbyname(struct dv_node *ddv, char *nm)
 495 {
 496         struct dv_node  *dv;
 497         avl_index_t     where;
 498         struct dv_node  dvtmp;
 499 
 500         ASSERT(RW_LOCK_HELD(&ddv->dv_contents));
 501         dcmn_err3(("dv_findbyname: %s\n", nm));
 502 
 503         dvtmp.dv_name = nm;
 504         dv = avl_find(&ddv->dv_entries, &dvtmp, &where);
 505         if (dv) {
 506                 ASSERT(dv->dv_dotdot == ddv);
 507                 ASSERT(strcmp(dv->dv_name, nm) == 0);
 508                 VN_HOLD(DVTOV(dv));
 509                 return (dv);
 510         }
 511         return (NULL);
 512 }
 513 
 514 /*
 515  * Inserts a new dv_node in a parent directory
 516  */
 517 void
 518 dv_insert(struct dv_node *ddv, struct dv_node *dv)
 519 {
 520         avl_index_t     where;
 521 
 522         ASSERT(RW_WRITE_HELD(&ddv->dv_contents));
 523         ASSERT(DVTOV(ddv)->v_type == VDIR);
 524         ASSERT(ddv->dv_nlink >= 2);
 525         ASSERT(dv->dv_nlink == 0);
 526 
 527         dcmn_err3(("dv_insert: %s\n", dv->dv_name));
 528 
 529         dv->dv_dotdot = ddv;
 530         if (DVTOV(dv)->v_type == VDIR) {
 531                 ddv->dv_nlink++;     /* .. to containing directory */
 532                 dv->dv_nlink = 2;    /* name + . */
 533         } else {
 534                 dv->dv_nlink = 1;    /* name */
 535         }
 536 
 537         /* enter node in the avl tree */
 538         VERIFY(avl_find(&ddv->dv_entries, dv, &where) == NULL);
 539         avl_insert(&ddv->dv_entries, dv, where);
 540 }
 541 
 542 /*
 543  * Unlink a dv_node from a perent directory
 544  */
 545 void
 546 dv_unlink(struct dv_node *ddv, struct dv_node *dv)
 547 {
 548         /* verify linkage of arguments */
 549         ASSERT(ddv && dv);
 550         ASSERT(dv->dv_dotdot == ddv);
 551         ASSERT(RW_WRITE_HELD(&ddv->dv_contents));
 552         ASSERT(DVTOV(ddv)->v_type == VDIR);
 553 
 554         dcmn_err3(("dv_unlink: %s\n", dv->dv_name));
 555 
 556         if (DVTOV(dv)->v_type == VDIR) {
 557                 ddv->dv_nlink--;     /* .. to containing directory */
 558                 dv->dv_nlink -= 2;   /* name + . */
 559         } else {
 560                 dv->dv_nlink -= 1;   /* name */
 561         }
 562         ASSERT(ddv->dv_nlink >= 2);
 563         ASSERT(dv->dv_nlink == 0);
 564 
 565         dv->dv_dotdot = NULL;
 566 
 567         /* remove from avl tree */
 568         avl_remove(&ddv->dv_entries, dv);
 569 }
 570 
 571 /*
 572  * Merge devfs node specific information into an attribute structure.
 573  *
 574  * NOTE: specfs provides ATIME,MTIME,CTIME,SIZE,BLKSIZE,NBLOCKS on leaf node.
 575  */
 576 void
 577 dv_vattr_merge(struct dv_node *dv, struct vattr *vap)
 578 {
 579         struct vnode    *vp = DVTOV(dv);
 580 
 581         vap->va_nodeid = dv->dv_ino;
 582         vap->va_nlink = dv->dv_nlink;
 583 
 584         if (vp->v_type == VDIR) {
 585                 vap->va_rdev = 0;
 586                 vap->va_fsid = vp->v_rdev;
 587         } else {
 588                 vap->va_rdev = vp->v_rdev;
 589                 vap->va_fsid = DVTOV(dv->dv_dotdot)->v_rdev;
 590                 vap->va_type = vp->v_type;
 591                 /* don't trust the shadow file type */
 592                 vap->va_mode &= ~S_IFMT;
 593                 if (vap->va_type == VCHR)
 594                         vap->va_mode |= S_IFCHR;
 595                 else
 596                         vap->va_mode |= S_IFBLK;
 597         }
 598 }
 599 
 600 /*
 601  * Get default device permission by consulting rules in
 602  * privilege specification in minor node and /etc/minor_perm.
 603  *
 604  * This function is called from the devname filesystem to get default
 605  * permissions for a device exported to a non-global zone.
 606  */
 607 void
 608 devfs_get_defattr(struct vnode *vp, struct vattr *vap, int *no_fs_perm)
 609 {
 610         mperm_t         mp;
 611         struct dv_node  *dv;
 612 
 613         /* If vp isn't a dv_node, return something sensible */
 614         if (!vn_matchops(vp, dv_vnodeops)) {
 615                 if (no_fs_perm)
 616                         *no_fs_perm = 0;
 617                 *vap = dv_vattr_file;
 618                 return;
 619         }
 620 
 621         /*
 622          * For minors not created by ddi_create_priv_minor_node(),
 623          * use devfs defaults.
 624          */
 625         dv = VTODV(vp);
 626         if (vp->v_type == VDIR) {
 627                 *vap = dv_vattr_dir;
 628         } else if (dv->dv_flags & DV_NO_FSPERM) {
 629                 if (no_fs_perm)
 630                         *no_fs_perm = 1;
 631                 *vap = dv_vattr_priv;
 632         } else {
 633                 /*
 634                  * look up perm bits from minor_perm
 635                  */
 636                 *vap = dv_vattr_file;
 637                 if (dev_minorperm(dv->dv_devi, dv->dv_name, &mp) == 0) {
 638                         VATTR_MP_MERGE((*vap), mp);
 639                         dcmn_err5(("%s: minor perm mode 0%o\n",
 640                             dv->dv_name, vap->va_mode));
 641                 } else if (dv->dv_flags & DV_DFLT_MODE) {
 642                         ASSERT((dv->dv_dflt_mode & ~S_IAMB) == 0);
 643                         vap->va_mode &= ~S_IAMB;
 644                         vap->va_mode |= dv->dv_dflt_mode;
 645                         dcmn_err5(("%s: priv mode 0%o\n",
 646                             dv->dv_name, vap->va_mode));
 647                 }
 648         }
 649 }
 650 
 651 /*
 652  * dv_shadow_node
 653  *
 654  * Given a VDIR dv_node, find/create the associated VDIR
 655  * node in the shadow attribute filesystem.
 656  *
 657  * Given a VCHR/VBLK dv_node, find the associated VREG
 658  * node in the shadow attribute filesystem.  These nodes
 659  * are only created to persist non-default attributes.
 660  * Lack of such a node implies the default permissions
 661  * are sufficient.
 662  *
 663  * Managing the attribute file entries is slightly tricky (mostly
 664  * because we can't intercept VN_HOLD and VN_RELE except on the last
 665  * release).
 666  *
 667  * We assert that if the dv_attrvp pointer is non-NULL, it points
 668  * to a singly-held (by us) vnode that represents the shadow entry
 669  * in the underlying filesystem.  To avoid store-ordering issues,
 670  * we assert that the pointer can only be tested under the dv_contents
 671  * READERS lock.
 672  */
 673 
 674 void
 675 dv_shadow_node(
 676         struct vnode *dvp,      /* devfs parent directory vnode */
 677         char *nm,               /* name component */
 678         struct vnode *vp,       /* devfs vnode */
 679         struct pathname *pnp,   /* the path .. */
 680         struct vnode *rdir,     /* the root .. */
 681         struct cred *cred,      /* who's asking? */
 682         int flags)              /* optionally create shadow node */
 683 {
 684         struct dv_node  *dv;    /* dv_node of named directory */
 685         struct vnode    *rdvp;  /* shadow parent directory vnode */
 686         struct vnode    *rvp;   /* shadow vnode */
 687         struct vnode    *rrvp;  /* realvp of shadow vnode */
 688         struct vattr    vattr;
 689         int             create_tried;
 690         int             error;
 691 
 692         ASSERT(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK);
 693         dv = VTODV(vp);
 694         dcmn_err3(("dv_shadow_node: name %s attr %p\n",
 695             nm, (void *)dv->dv_attrvp));
 696 
 697         if ((flags & DV_SHADOW_WRITE_HELD) == 0) {
 698                 ASSERT(RW_READ_HELD(&dv->dv_contents));
 699                 if (dv->dv_attrvp != NULLVP)
 700                         return;
 701                 if (!rw_tryupgrade(&dv->dv_contents)) {
 702                         rw_exit(&dv->dv_contents);
 703                         rw_enter(&dv->dv_contents, RW_WRITER);
 704                         if (dv->dv_attrvp != NULLVP) {
 705                                 rw_downgrade(&dv->dv_contents);
 706                                 return;
 707                         }
 708                 }
 709         } else {
 710                 ASSERT(RW_WRITE_HELD(&dv->dv_contents));
 711                 if (dv->dv_attrvp != NULLVP)
 712                         return;
 713         }
 714 
 715         ASSERT(RW_WRITE_HELD(&dv->dv_contents) && dv->dv_attrvp == NULL);
 716 
 717         rdvp = VTODV(dvp)->dv_attrvp;
 718         create_tried = 0;
 719 lookup:
 720         if (rdvp && (dv->dv_flags & DV_NO_FSPERM) == 0) {
 721                 error = VOP_LOOKUP(rdvp, nm, &rvp, pnp, LOOKUP_DIR, rdir, cred,
 722                     NULL, NULL, NULL);
 723 
 724                 /* factor out the snode since we only want the attribute node */
 725                 if ((error == 0) && (VOP_REALVP(rvp, &rrvp, NULL) == 0)) {
 726                         VN_HOLD(rrvp);
 727                         VN_RELE(rvp);
 728                         rvp = rrvp;
 729                 }
 730         } else
 731                 error = EROFS;          /* no parent, no entry */
 732 
 733         /*
 734          * All we want is the permissions (and maybe ACLs and
 735          * extended attributes), and we want to perform lookups
 736          * by name.  Drivers occasionally change their minor
 737          * number space.  If something changes, there's no
 738          * much we can do about it here.
 739          */
 740 
 741         /* The shadow node checks out. We are done */
 742         if (error == 0) {
 743                 dv->dv_attrvp = rvp; /* with one hold */
 744 
 745                 /*
 746                  * Determine if we have non-trivial ACLs on this node.
 747                  * It is not necessary to VOP_RWLOCK since fs_acl_nontrivial
 748                  * only does VOP_GETSECATTR.
 749                  */
 750                 dv->dv_flags &= ~DV_ACL;
 751 
 752                 if (fs_acl_nontrivial(rvp, cred))
 753                         dv->dv_flags |= DV_ACL;
 754 
 755                 /*
 756                  * If we have synced out the memory attributes, free
 757                  * them and switch back to using the persistent store.
 758                  */
 759                 if (rvp && dv->dv_attr) {
 760                         kmem_free(dv->dv_attr, sizeof (struct vattr));
 761                         dv->dv_attr = NULL;
 762                 }
 763                 if ((flags & DV_SHADOW_WRITE_HELD) == 0)
 764                         rw_downgrade(&dv->dv_contents);
 765                 ASSERT(RW_LOCK_HELD(&dv->dv_contents));
 766                 return;
 767         }
 768 
 769         /*
 770          * Failed to find attribute in persistent backing store,
 771          * get default permission bits.
 772          */
 773         devfs_get_defattr(vp, &vattr, NULL);
 774 
 775         dv_vattr_merge(dv, &vattr);
 776         gethrestime(&vattr.va_atime);
 777         vattr.va_mtime = vattr.va_atime;
 778         vattr.va_ctime = vattr.va_atime;
 779 
 780         /*
 781          * Try to create shadow dir. This is necessary in case
 782          * we need to create a shadow leaf node later, when user
 783          * executes chmod.
 784          */
 785         if ((error == ENOENT) && !create_tried) {
 786                 switch (vp->v_type) {
 787                 case VDIR:
 788                         error = VOP_MKDIR(rdvp, nm, &vattr, &rvp, kcred,
 789                             NULL, 0, NULL);
 790                         dsysdebug(error, ("vop_mkdir %s %s %d\n",
 791                             VTODV(dvp)->dv_name, nm, error));
 792                         create_tried = 1;
 793                         break;
 794 
 795                 case VCHR:
 796                 case VBLK:
 797                         /*
 798                          * Shadow nodes are only created on demand
 799                          */
 800                         if (flags & DV_SHADOW_CREATE) {
 801                                 error = VOP_CREATE(rdvp, nm, &vattr, NONEXCL,
 802                                     VREAD|VWRITE, &rvp, kcred, 0, NULL, NULL);
 803                                 dsysdebug(error, ("vop_create %s %s %d\n",
 804                                     VTODV(dvp)->dv_name, nm, error));
 805                                 create_tried = 1;
 806                         }
 807                         break;
 808 
 809                 default:
 810                         cmn_err(CE_PANIC, "devfs: %s: create", dvnm);
 811                         /*NOTREACHED*/
 812                 }
 813 
 814                 if (create_tried &&
 815                     (error == 0) || (error == EEXIST)) {
 816                         VN_RELE(rvp);
 817                         goto lookup;
 818                 }
 819         }
 820 
 821         /* Store attribute in memory */
 822         if (dv->dv_attr == NULL) {
 823                 dv->dv_attr = kmem_alloc(sizeof (struct vattr), KM_SLEEP);
 824                 *(dv->dv_attr) = vattr;
 825         }
 826 
 827         if ((flags & DV_SHADOW_WRITE_HELD) == 0)
 828                 rw_downgrade(&dv->dv_contents);
 829         ASSERT(RW_LOCK_HELD(&dv->dv_contents));
 830 }
 831 
 832 /*
 833  * Given a devinfo node, and a name, returns the appropriate
 834  * minor information for that named node, if it exists.
 835  */
 836 static int
 837 dv_find_leafnode(dev_info_t *devi, char *minor_nm, struct ddi_minor_data *r_mi)
 838 {
 839         struct ddi_minor_data   *dmd;
 840 
 841         ASSERT(i_ddi_devi_attached(devi));
 842 
 843         dcmn_err3(("dv_find_leafnode: %s\n", minor_nm));
 844         ASSERT(DEVI_BUSY_OWNED(devi));
 845         for (dmd = DEVI(devi)->devi_minor; dmd; dmd = dmd->next) {
 846 
 847                 /*
 848                  * Skip alias nodes and nodes without a name.
 849                  */
 850                 if ((dmd->type == DDM_ALIAS) || (dmd->ddm_name == NULL))
 851                         continue;
 852 
 853                 dcmn_err4(("dv_find_leafnode: (%s,%s)\n",
 854                     minor_nm, dmd->ddm_name));
 855                 if (strcmp(minor_nm, dmd->ddm_name) == 0) {
 856                         r_mi->ddm_dev = dmd->ddm_dev;
 857                         r_mi->ddm_spec_type = dmd->ddm_spec_type;
 858                         r_mi->type = dmd->type;
 859                         r_mi->ddm_flags = dmd->ddm_flags;
 860                         r_mi->ddm_node_priv = dmd->ddm_node_priv;
 861                         r_mi->ddm_priv_mode = dmd->ddm_priv_mode;
 862                         if (r_mi->ddm_node_priv)
 863                                 dphold(r_mi->ddm_node_priv);
 864                         return (0);
 865                 }
 866         }
 867 
 868         dcmn_err3(("dv_find_leafnode: %s: ENOENT\n", minor_nm));
 869         return (ENOENT);
 870 }
 871 
 872 /*
 873  * Special handling for clone node:
 874  *      Clone minor name is a driver name, the minor number will
 875  *      be the major number of the driver. There is no minor
 876  *      node under the clone driver, so we'll manufacture the
 877  *      dev_t.
 878  */
 879 static struct dv_node *
 880 dv_clone_mknod(struct dv_node *ddv, char *drvname)
 881 {
 882         major_t                 major;
 883         struct dv_node          *dvp;
 884         char                    *devnm;
 885         struct ddi_minor_data   *dmd;
 886 
 887         /*
 888          * Make sure drvname is a STREAMS driver. We load the driver,
 889          * but don't attach to any instances. This makes stat(2)
 890          * relatively cheap.
 891          */
 892         major = ddi_name_to_major(drvname);
 893         if (major == DDI_MAJOR_T_NONE)
 894                 return (NULL);
 895 
 896         if (ddi_hold_driver(major) == NULL)
 897                 return (NULL);
 898 
 899         if (STREAMSTAB(major) == NULL) {
 900                 ddi_rele_driver(major);
 901                 return (NULL);
 902         }
 903 
 904         ddi_rele_driver(major);
 905         devnm = kmem_alloc(MAXNAMELEN, KM_SLEEP);
 906         (void) snprintf(devnm, MAXNAMELEN, "clone@0:%s", drvname);
 907         dmd = kmem_zalloc(sizeof (*dmd), KM_SLEEP);
 908         dmd->ddm_dev = makedevice(clone_major, (minor_t)major);
 909         dmd->ddm_spec_type = S_IFCHR;
 910         dvp = dv_mknod(ddv, clone_dip, devnm, dmd);
 911         kmem_free(dmd, sizeof (*dmd));
 912         kmem_free(devnm, MAXNAMELEN);
 913         return (dvp);
 914 }
 915 
 916 /*
 917  * Given the parent directory node, and a name in it, returns the
 918  * named dv_node to the caller (as a vnode).
 919  *
 920  * (We need pnp and rdir for doing shadow lookups; they can be NULL)
 921  */
 922 int
 923 dv_find(struct dv_node *ddv, char *nm, struct vnode **vpp, struct pathname *pnp,
 924     struct vnode *rdir, struct cred *cred, uint_t ndi_flags)
 925 {
 926         extern int isminiroot;  /* see modctl.c */
 927 
 928         int                     circ;
 929         int                     rv = 0, was_busy = 0, nmlen, write_held = 0;
 930         struct vnode            *vp;
 931         struct dv_node          *dv, *dup;
 932         dev_info_t              *pdevi, *devi = NULL;
 933         char                    *mnm;
 934         struct ddi_minor_data   *dmd;
 935 
 936         dcmn_err3(("dv_find %s\n", nm));
 937 
 938         if (!rw_tryenter(&ddv->dv_contents, RW_READER)) {
 939                 if (tsd_get(devfs_clean_key))
 940                         return (EBUSY);
 941                 rw_enter(&ddv->dv_contents, RW_READER);
 942         }
 943 start:
 944         if (DV_STALE(ddv)) {
 945                 rw_exit(&ddv->dv_contents);
 946                 return (ESTALE);
 947         }
 948 
 949         /*
 950          * Empty name or ., return node itself.
 951          */
 952         nmlen = strlen(nm);
 953         if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) {
 954                 *vpp = DVTOV(ddv);
 955                 rw_exit(&ddv->dv_contents);
 956                 VN_HOLD(*vpp);
 957                 return (0);
 958         }
 959 
 960         /*
 961          * .., return the parent directory
 962          */
 963         if ((nmlen == 2) && (strcmp(nm, "..") == 0)) {
 964                 *vpp = DVTOV(ddv->dv_dotdot);
 965                 rw_exit(&ddv->dv_contents);
 966                 VN_HOLD(*vpp);
 967                 return (0);
 968         }
 969 
 970         /*
 971          * Fail anything without a valid device name component
 972          */
 973         if (nm[0] == '@' || nm[0] == ':') {
 974                 dcmn_err3(("devfs: no driver '%s'\n", nm));
 975                 rw_exit(&ddv->dv_contents);
 976                 return (ENOENT);
 977         }
 978 
 979         /*
 980          * So, now we have to deal with the trickier stuff.
 981          *
 982          * (a) search the existing list of dv_nodes on this directory
 983          */
 984         if ((dv = dv_findbyname(ddv, nm)) != NULL) {
 985 founddv:
 986                 ASSERT(RW_LOCK_HELD(&ddv->dv_contents));
 987 
 988                 if (!rw_tryenter(&dv->dv_contents, RW_READER)) {
 989                         if (tsd_get(devfs_clean_key)) {
 990                                 VN_RELE(DVTOV(dv));
 991                                 rw_exit(&ddv->dv_contents);
 992                                 return (EBUSY);
 993                         }
 994                         rw_enter(&dv->dv_contents, RW_READER);
 995                 }
 996 
 997                 vp = DVTOV(dv);
 998                 if ((dv->dv_attrvp != NULLVP) ||
 999                     (vp->v_type != VDIR && dv->dv_attr != NULL)) {
1000                         /*
1001                          * Common case - we already have attributes
1002                          */
1003                         rw_exit(&dv->dv_contents);
1004                         rw_exit(&ddv->dv_contents);
1005                         goto found;
1006                 }
1007 
1008                 /*
1009                  * No attribute vp, try and build one.
1010                  *
1011                  * dv_shadow_node() can briefly drop &dv->dv_contents lock
1012                  * if it is unable to upgrade it to a write lock. If the
1013                  * current thread has come in through the bottom-up device
1014                  * configuration devfs_clean() path, we may deadlock against
1015                  * a thread performing top-down device configuration if it
1016                  * grabs the contents lock. To avoid this, when we are on the
1017                  * devfs_clean() path we attempt to upgrade the dv_contents
1018                  * lock before we call dv_shadow_node().
1019                  */
1020                 if (tsd_get(devfs_clean_key)) {
1021                         if (!rw_tryupgrade(&dv->dv_contents)) {
1022                                 VN_RELE(DVTOV(dv));
1023                                 rw_exit(&dv->dv_contents);
1024                                 rw_exit(&ddv->dv_contents);
1025                                 return (EBUSY);
1026                         }
1027 
1028                         write_held = DV_SHADOW_WRITE_HELD;
1029                 }
1030 
1031                 dv_shadow_node(DVTOV(ddv), nm, vp, pnp, rdir, cred,
1032                     write_held);
1033 
1034                 rw_exit(&dv->dv_contents);
1035                 rw_exit(&ddv->dv_contents);
1036                 goto found;
1037         }
1038 
1039         /*
1040          * (b) Search the child devinfo nodes of our parent directory,
1041          * looking for the named node.  If we find it, build a new
1042          * node, then grab the writers lock, search the directory
1043          * if it's still not there, then insert it.
1044          *
1045          * We drop the devfs locks before accessing the device tree.
1046          * Take care to mark the node BUSY so that a forced devfs_clean
1047          * doesn't mark the directory node stale.
1048          *
1049          * Also, check if we are called as part of devfs_clean or
1050          * reset_perm. If so, simply return not found because there
1051          * is nothing to clean.
1052          */
1053         if (tsd_get(devfs_clean_key)) {
1054                 rw_exit(&ddv->dv_contents);
1055                 return (ENOENT);
1056         }
1057 
1058         /*
1059          * We could be either READ or WRITE locked at
1060          * this point. Upgrade if we are read locked.
1061          */
1062         ASSERT(RW_LOCK_HELD(&ddv->dv_contents));
1063         if (rw_read_locked(&ddv->dv_contents) &&
1064             !rw_tryupgrade(&ddv->dv_contents)) {
1065                 rw_exit(&ddv->dv_contents);
1066                 rw_enter(&ddv->dv_contents, RW_WRITER);
1067                 /*
1068                  * Things may have changed when we dropped
1069                  * the contents lock, so start from top again
1070                  */
1071                 goto start;
1072         }
1073         ddv->dv_busy++;              /* mark busy before dropping lock */
1074         was_busy++;
1075         rw_exit(&ddv->dv_contents);
1076 
1077         pdevi = ddv->dv_devi;
1078         ASSERT(pdevi != NULL);
1079 
1080         mnm = strchr(nm, ':');
1081         if (mnm)
1082                 *mnm = (char)0;
1083 
1084         /*
1085          * Configure one nexus child, will call nexus's bus_ops
1086          * If successful, devi is held upon returning.
1087          * Note: devfs lookup should not be configuring grandchildren.
1088          */
1089         ASSERT((ndi_flags & NDI_CONFIG) == 0);
1090 
1091         rv = ndi_devi_config_one(pdevi, nm, &devi, ndi_flags | NDI_NO_EVENT);
1092         if (mnm)
1093                 *mnm = ':';
1094         if (rv != NDI_SUCCESS) {
1095                 rv = ENOENT;
1096                 goto notfound;
1097         }
1098 
1099         ASSERT(devi);
1100 
1101         /* Check if this is a path alias */
1102         if (ddi_aliases_present == B_TRUE && ddi_get_parent(devi) != pdevi) {
1103                 char *curr = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1104 
1105                 (void) ddi_pathname(devi, curr);
1106 
1107                 vp = NULL;
1108                 if (devfs_lookupname(curr, NULL, &vp) == 0 && vp) {
1109                         dv = VTODV(vp);
1110                         kmem_free(curr, MAXPATHLEN);
1111                         goto found;
1112                 }
1113                 kmem_free(curr, MAXPATHLEN);
1114         }
1115 
1116         /*
1117          * If we configured a hidden node, consider it notfound.
1118          */
1119         if (ndi_dev_is_hidden_node(devi)) {
1120                 ndi_rele_devi(devi);
1121                 rv = ENOENT;
1122                 goto notfound;
1123         }
1124 
1125         /*
1126          * Don't make vhci clients visible under phci, unless we
1127          * are in miniroot.
1128          */
1129         if (isminiroot == 0 && ddi_get_parent(devi) != pdevi) {
1130                 ndi_rele_devi(devi);
1131                 rv = ENOENT;
1132                 goto notfound;
1133         }
1134 
1135         ASSERT(devi && i_ddi_devi_attached(devi));
1136 
1137         /*
1138          * Invalidate cache to notice newly created minor nodes.
1139          */
1140         rw_enter(&ddv->dv_contents, RW_WRITER);
1141         ddv->dv_flags |= DV_BUILD;
1142         rw_exit(&ddv->dv_contents);
1143 
1144         /*
1145          * mkdir for nexus drivers and leaf nodes as well.  If we are racing
1146          * and create a duplicate, the duplicate will be destroyed below.
1147          */
1148         if (mnm == NULL) {
1149                 dv = dv_mkdir(ddv, devi, nm);
1150         } else {
1151                 /*
1152                  * Allocate dmd first to avoid KM_SLEEP with active
1153                  * ndi_devi_enter.
1154                  */
1155                 dmd = kmem_zalloc(sizeof (*dmd), KM_SLEEP);
1156                 ndi_devi_enter(devi, &circ);
1157                 if (devi == clone_dip) {
1158                         /*
1159                          * For clone minors, load the driver indicated by
1160                          * minor name.
1161                          */
1162                         dv = dv_clone_mknod(ddv, mnm + 1);
1163                 } else {
1164                         /*
1165                          * Find minor node and make a dv_node
1166                          */
1167                         if (dv_find_leafnode(devi, mnm + 1, dmd) == 0) {
1168                                 dv = dv_mknod(ddv, devi, nm, dmd);
1169                                 if (dmd->ddm_node_priv)
1170                                         dpfree(dmd->ddm_node_priv);
1171                         }
1172                 }
1173                 ndi_devi_exit(devi, circ);
1174                 kmem_free(dmd, sizeof (*dmd));
1175         }
1176         /*
1177          * Release hold from ndi_devi_config_one()
1178          */
1179         ndi_rele_devi(devi);
1180 
1181         if (dv == NULL) {
1182                 rv = ENOENT;
1183                 goto notfound;
1184         }
1185 
1186         /*
1187          * We have released the dv_contents lock, need to check
1188          * if another thread already created a duplicate node
1189          */
1190         rw_enter(&ddv->dv_contents, RW_WRITER);
1191         if ((dup = dv_findbyname(ddv, nm)) == NULL) {
1192                 dv_insert(ddv, dv);
1193         } else {
1194                 /*
1195                  * Duplicate found, use the existing node
1196                  */
1197                 VN_RELE(DVTOV(dv));
1198                 dv_destroy(dv, 0);
1199                 dv = dup;
1200         }
1201         goto founddv;
1202         /*NOTREACHED*/
1203 
1204 found:
1205         /*
1206          * Fail lookup of device that has now become hidden (typically via
1207          * hot removal of open device).
1208          */
1209         if (dv->dv_devi && ndi_dev_is_hidden_node(dv->dv_devi)) {
1210                 dcmn_err2(("dv_find: nm %s failed: hidden/removed\n", nm));
1211                 VN_RELE(vp);
1212                 rv = ENOENT;
1213                 goto notfound;
1214         }
1215 
1216         /*
1217          * Skip non-kernel lookups of internal nodes.
1218          * This use of kcred to distinguish between user and
1219          * internal kernel lookups is unfortunate.  The information
1220          * provided by the seg argument to lookupnameat should
1221          * evolve into a lookup flag for filesystems that need
1222          * this distinction.
1223          */
1224         if ((dv->dv_flags & DV_INTERNAL) && (cred != kcred)) {
1225                 dcmn_err2(("dv_find: nm %s failed: internal\n", nm));
1226                 VN_RELE(vp);
1227                 rv = ENOENT;
1228                 goto notfound;
1229         }
1230 
1231         dcmn_err2(("dv_find: returning vp for nm %s\n", nm));
1232         if (vp->v_type == VCHR || vp->v_type == VBLK) {
1233                 /*
1234                  * If vnode is a device, return special vnode instead
1235                  * (though it knows all about -us- via sp->s_realvp,
1236                  * sp->s_devvp, and sp->s_dip)
1237                  */
1238                 *vpp = specvp_devfs(vp, vp->v_rdev, vp->v_type, cred,
1239                     dv->dv_devi);
1240                 VN_RELE(vp);
1241                 if (*vpp == NULLVP)
1242                         rv = ENOSYS;
1243         } else
1244                 *vpp = vp;
1245 
1246 notfound:
1247         if (was_busy) {
1248                 /*
1249                  * Non-zero was_busy tells us that we are not in the
1250                  * devfs_clean() path which in turn means that we can afford
1251                  * to take the contents lock unconditionally.
1252                  */
1253                 rw_enter(&ddv->dv_contents, RW_WRITER);
1254                 ddv->dv_busy--;
1255                 rw_exit(&ddv->dv_contents);
1256         }
1257         return (rv);
1258 }
1259 
1260 /*
1261  * The given directory node is out-of-date; that is, it has been
1262  * marked as needing to be rebuilt, possibly because some new devinfo
1263  * node has come into existence, or possibly because this is the first
1264  * time we've been here.
1265  */
1266 void
1267 dv_filldir(struct dv_node *ddv)
1268 {
1269         struct dv_node          *dv;
1270         dev_info_t              *devi, *pdevi;
1271         struct ddi_minor_data   *dmd;
1272         char                    devnm[MAXNAMELEN];
1273         int                     circ, ccirc;
1274 
1275         ASSERT(DVTOV(ddv)->v_type == VDIR);
1276         ASSERT(RW_WRITE_HELD(&ddv->dv_contents));
1277         ASSERT(ddv->dv_flags & DV_BUILD);
1278 
1279         dcmn_err3(("dv_filldir: %s\n", ddv->dv_name));
1280         if (DV_STALE(ddv))
1281                 return;
1282         pdevi = ddv->dv_devi;
1283 
1284         if (ndi_devi_config(pdevi, NDI_NO_EVENT) != NDI_SUCCESS) {
1285                 dcmn_err3(("dv_filldir: config error %s\n", ddv->dv_name));
1286         }
1287 
1288         ndi_devi_enter(pdevi, &circ);
1289         for (devi = ddi_get_child(pdevi); devi;
1290             devi = ddi_get_next_sibling(devi)) {
1291                 /*
1292                  * While we know enough to create a directory at DS_INITIALIZED,
1293                  * the directory will be empty until DS_ATTACHED. The existence
1294                  * of an empty directory dv_node will cause a devi_ref, which
1295                  * has caused problems for existing code paths doing offline/DR
1296                  * type operations - making devfs_clean coordination even more
1297                  * sensitive and error prone. Given this, the 'continue' below
1298                  * is checking for DS_ATTACHED instead of DS_INITIALIZED.
1299                  */
1300                 if (i_ddi_node_state(devi) < DS_ATTACHED)
1301                         continue;
1302 
1303                 /* skip hidden nodes */
1304                 if (ndi_dev_is_hidden_node(devi))
1305                         continue;
1306 
1307                 dcmn_err3(("dv_filldir: node %s\n", ddi_node_name(devi)));
1308 
1309                 ndi_devi_enter(devi, &ccirc);
1310                 for (dmd = DEVI(devi)->devi_minor; dmd; dmd = dmd->next) {
1311                         char *addr;
1312 
1313                         /*
1314                          * Skip alias nodes, internal nodes, and nodes
1315                          * without a name.  We allow DDM_DEFAULT nodes
1316                          * to appear in readdir.
1317                          */
1318                         if ((dmd->type == DDM_ALIAS) ||
1319                             (dmd->type == DDM_INTERNAL_PATH) ||
1320                             (dmd->ddm_name == NULL))
1321                                 continue;
1322 
1323                         addr = ddi_get_name_addr(devi);
1324                         if (addr && *addr)
1325                                 (void) sprintf(devnm, "%s@%s:%s",
1326                                     ddi_node_name(devi), addr, dmd->ddm_name);
1327                         else
1328                                 (void) sprintf(devnm, "%s:%s",
1329                                     ddi_node_name(devi), dmd->ddm_name);
1330 
1331                         if ((dv = dv_findbyname(ddv, devnm)) != NULL) {
1332                                 /* dv_node already exists */
1333                                 VN_RELE(DVTOV(dv));
1334                                 continue;
1335                         }
1336 
1337                         dv = dv_mknod(ddv, devi, devnm, dmd);
1338                         dv_insert(ddv, dv);
1339                         VN_RELE(DVTOV(dv));
1340                 }
1341                 ndi_devi_exit(devi, ccirc);
1342 
1343                 (void) ddi_deviname(devi, devnm);
1344                 if ((dv = dv_findbyname(ddv, devnm + 1)) == NULL) {
1345                         /* directory doesn't exist */
1346                         dv = dv_mkdir(ddv, devi, devnm + 1);
1347                         dv_insert(ddv, dv);
1348                 }
1349                 VN_RELE(DVTOV(dv));
1350         }
1351         ndi_devi_exit(pdevi, circ);
1352 
1353         ddv->dv_flags &= ~DV_BUILD;
1354 }
1355 
1356 /*
1357  * Given a directory node, clean out all the nodes beneath.
1358  *
1359  * VDIR:        Reinvoke to clean them, then delete the directory.
1360  * VCHR, VBLK:  Just blow them away.
1361  *
1362  * Mark the directories touched as in need of a rebuild, in case
1363  * we fall over part way through. When DV_CLEAN_FORCE is specified,
1364  * we mark referenced empty directories as stale to facilitate DR.
1365  */
1366 int
1367 dv_cleandir(struct dv_node *ddv, char *devnm, uint_t flags)
1368 {
1369         struct dv_node  *dv;
1370         struct dv_node  *next;
1371         struct vnode    *vp;
1372         int             busy = 0;
1373 
1374         /*
1375          * We should always be holding the tsd_clean_key here: dv_cleandir()
1376          * will be called as a result of a devfs_clean request and the
1377          * tsd_clean_key will be set in either in devfs_clean() itself or in
1378          * devfs_clean_vhci().
1379          *
1380          * Since we are on the devfs_clean path, we return EBUSY if we cannot
1381          * get the contents lock: if we blocked here we might deadlock against
1382          * a thread performing top-down device configuration.
1383          */
1384         ASSERT(tsd_get(devfs_clean_key));
1385 
1386         dcmn_err3(("dv_cleandir: %s\n", ddv->dv_name));
1387 
1388         if (!(flags & DV_CLEANDIR_LCK) &&
1389             !rw_tryenter(&ddv->dv_contents, RW_WRITER))
1390                 return (EBUSY);
1391 
1392         for (dv = DV_FIRST_ENTRY(ddv); dv; dv = next) {
1393                 next = DV_NEXT_ENTRY(ddv, dv);
1394 
1395                 /*
1396                  * If devnm is specified, the non-minor portion of the
1397                  * name must match devnm.
1398                  */
1399                 if (devnm &&
1400                     (strncmp(devnm, dv->dv_name, strlen(devnm)) ||
1401                     (dv->dv_name[strlen(devnm)] != ':' &&
1402                     dv->dv_name[strlen(devnm)] != '\0')))
1403                         continue;
1404 
1405                 /* check type of what we are cleaning */
1406                 vp = DVTOV(dv);
1407                 if (vp->v_type == VDIR) {
1408                         /* recurse on directories */
1409                         rw_enter(&dv->dv_contents, RW_WRITER);
1410                         if (dv_cleandir(dv, NULL,
1411                             flags | DV_CLEANDIR_LCK) == EBUSY) {
1412                                 rw_exit(&dv->dv_contents);
1413                                 goto set_busy;
1414                         }
1415 
1416                         /* A clean directory is an empty directory... */
1417                         ASSERT(dv->dv_nlink == 2);
1418                         mutex_enter(&vp->v_lock);
1419                         if (vp->v_count > 0) {
1420                                 /*
1421                                  * ... but an empty directory can still have
1422                                  * references to it. If we have dv_busy or
1423                                  * DV_CLEAN_FORCE is *not* specified then a
1424                                  * referenced directory is considered busy.
1425                                  */
1426                                 if (dv->dv_busy || !(flags & DV_CLEAN_FORCE)) {
1427                                         mutex_exit(&vp->v_lock);
1428                                         rw_exit(&dv->dv_contents);
1429                                         goto set_busy;
1430                                 }
1431 
1432                                 /*
1433                                  * Mark referenced directory stale so that DR
1434                                  * will succeed even if a shell has
1435                                  * /devices/xxx as current directory (causing
1436                                  * VN_HOLD reference to an empty directory).
1437                                  */
1438                                 ASSERT(!DV_STALE(dv));
1439                                 ndi_rele_devi(dv->dv_devi);
1440                                 dv->dv_devi = NULL;  /* mark DV_STALE */
1441                         }
1442                 } else {
1443                         ASSERT((vp->v_type == VCHR) || (vp->v_type == VBLK));
1444                         ASSERT(dv->dv_nlink == 1);   /* no hard links */
1445                         mutex_enter(&vp->v_lock);
1446                         if (vp->v_count > 0) {
1447                                 /*
1448                                  * The file still has references to it.  If
1449                                  * DEVI_GONE is *not* set on the devi referenced
1450                                  * file is considered busy.
1451                                  */
1452                                 if (!DEVI_IS_GONE(dv->dv_devi)) {
1453                                         mutex_exit(&vp->v_lock);
1454                                         goto set_busy;
1455                                 }
1456 
1457                                 /*
1458                                  * Mark referenced file stale so that DR will
1459                                  * succeed even if there are userland opens.
1460                                  */
1461                                 ASSERT(!DV_STALE(dv));
1462                                 ndi_rele_devi(dv->dv_devi);
1463                                 dv->dv_devi = NULL;
1464                         }
1465                 }
1466 
1467                 /* unlink from directory */
1468                 dv_unlink(ddv, dv);
1469 
1470                 /* drop locks */
1471                 mutex_exit(&vp->v_lock);
1472                 if (vp->v_type == VDIR)
1473                         rw_exit(&dv->dv_contents);
1474 
1475                 /* destroy vnode if ref count is zero */
1476                 if (vp->v_count == 0)
1477                         dv_destroy(dv, flags);
1478 
1479                 continue;
1480 
1481                 /*
1482                  * If devnm is not NULL we return immediately on busy,
1483                  * otherwise we continue destroying unused dv_node's.
1484                  */
1485 set_busy:       busy++;
1486                 if (devnm)
1487                         break;
1488         }
1489 
1490         /*
1491          * This code may be invoked to inform devfs that a new node has
1492          * been created in the kernel device tree. So we always set
1493          * the DV_BUILD flag to allow the next dv_filldir() to pick
1494          * the new devinfo nodes.
1495          */
1496         ddv->dv_flags |= DV_BUILD;
1497 
1498         if (!(flags & DV_CLEANDIR_LCK))
1499                 rw_exit(&ddv->dv_contents);
1500 
1501         return (busy ? EBUSY : 0);
1502 }
1503 
1504 /*
1505  * Walk through the devfs hierarchy, correcting the permissions of
1506  * devices with default permissions that do not match those specified
1507  * by minor perm.  This can only be done for all drivers for now.
1508  */
1509 static int
1510 dv_reset_perm_dir(struct dv_node *ddv, uint_t flags)
1511 {
1512         struct dv_node  *dv;
1513         struct vnode    *vp;
1514         int             retval = 0;
1515         struct vattr    *attrp;
1516         mperm_t         mp;
1517         char            *nm;
1518         uid_t           old_uid;
1519         gid_t           old_gid;
1520         mode_t          old_mode;
1521 
1522         rw_enter(&ddv->dv_contents, RW_WRITER);
1523         for (dv = DV_FIRST_ENTRY(ddv); dv; dv = DV_NEXT_ENTRY(ddv, dv)) {
1524                 int error = 0;
1525                 nm = dv->dv_name;
1526 
1527                 rw_enter(&dv->dv_contents, RW_READER);
1528                 vp = DVTOV(dv);
1529                 if (vp->v_type == VDIR) {
1530                         rw_exit(&dv->dv_contents);
1531                         if (dv_reset_perm_dir(dv, flags) != 0) {
1532                                 error = EBUSY;
1533                         }
1534                 } else {
1535                         ASSERT(vp->v_type == VCHR || vp->v_type == VBLK);
1536 
1537                         /*
1538                          * Check for permissions from minor_perm
1539                          * If there are none, we're done
1540                          */
1541                         rw_exit(&dv->dv_contents);
1542                         if (dev_minorperm(dv->dv_devi, nm, &mp) != 0)
1543                                 continue;
1544 
1545                         rw_enter(&dv->dv_contents, RW_READER);
1546 
1547                         /*
1548                          * Allow a node's permissions to be altered
1549                          * permanently from the defaults by chmod,
1550                          * using the shadow node as backing store.
1551                          * Otherwise, update node to minor_perm permissions.
1552                          */
1553                         if (dv->dv_attrvp == NULLVP) {
1554                                 /*
1555                                  * No attribute vp, try to find one.
1556                                  */
1557                                 dv_shadow_node(DVTOV(ddv), nm, vp,
1558                                     NULL, NULLVP, kcred, 0);
1559                         }
1560                         if (dv->dv_attrvp != NULLVP || dv->dv_attr == NULL) {
1561                                 rw_exit(&dv->dv_contents);
1562                                 continue;
1563                         }
1564 
1565                         attrp = dv->dv_attr;
1566 
1567                         if (VATTRP_MP_CMP(attrp, mp) == 0) {
1568                                 dcmn_err5(("%s: no perm change: "
1569                                     "%d %d 0%o\n", nm, attrp->va_uid,
1570                                     attrp->va_gid, attrp->va_mode));
1571                                 rw_exit(&dv->dv_contents);
1572                                 continue;
1573                         }
1574 
1575                         old_uid = attrp->va_uid;
1576                         old_gid = attrp->va_gid;
1577                         old_mode = attrp->va_mode;
1578 
1579                         VATTRP_MP_MERGE(attrp, mp);
1580                         mutex_enter(&vp->v_lock);
1581                         if (vp->v_count > 0) {
1582                                 error = EBUSY;
1583                         }
1584                         mutex_exit(&vp->v_lock);
1585 
1586                         dcmn_err5(("%s: perm %d/%d/0%o -> %d/%d/0%o (%d)\n",
1587                             nm, old_uid, old_gid, old_mode, attrp->va_uid,
1588                             attrp->va_gid, attrp->va_mode, error));
1589 
1590                         rw_exit(&dv->dv_contents);
1591                 }
1592 
1593                 if (error != 0) {
1594                         retval = error;
1595                 }
1596         }
1597 
1598         ddv->dv_flags |= DV_BUILD;
1599 
1600         rw_exit(&ddv->dv_contents);
1601 
1602         return (retval);
1603 }
1604 
1605 int
1606 devfs_reset_perm(uint_t flags)
1607 {
1608         struct dv_node  *dvp;
1609         int             rval;
1610 
1611         if ((dvp = devfs_dip_to_dvnode(ddi_root_node())) == NULL)
1612                 return (0);
1613 
1614         VN_HOLD(DVTOV(dvp));
1615         rval = dv_reset_perm_dir(dvp, flags);
1616         VN_RELE(DVTOV(dvp));
1617         return (rval);
1618 }
1619 
1620 /*
1621  * Clean up dangling devfs shadow nodes for removed
1622  * drivers so that, in the event the driver is re-added
1623  * to the system, newly created nodes won't incorrectly
1624  * pick up these stale shadow node permissions.
1625  *
1626  * This is accomplished by walking down the pathname
1627  * to the directory, starting at the root's attribute
1628  * node, then removing all minors matching the specified
1629  * node name.  Care must be taken to remove all entries
1630  * in a directory before the directory itself, so that
1631  * the clean-up associated with rem_drv'ing a nexus driver
1632  * does not inadvertently result in an inconsistent
1633  * filesystem underlying devfs.
1634  */
1635 
1636 static int
1637 devfs_remdrv_rmdir(vnode_t *dirvp, const char *dir, vnode_t *rvp)
1638 {
1639         int             error;
1640         vnode_t         *vp;
1641         int             eof;
1642         struct iovec    iov;
1643         struct uio      uio;
1644         struct dirent64 *dp;
1645         dirent64_t      *dbuf;
1646         size_t          dlen;
1647         size_t          dbuflen;
1648         int             ndirents = 64;
1649         char            *nm;
1650 
1651         VN_HOLD(dirvp);
1652 
1653         dlen = ndirents * (sizeof (*dbuf));
1654         dbuf = kmem_alloc(dlen, KM_SLEEP);
1655 
1656         uio.uio_iov = &iov;
1657         uio.uio_iovcnt = 1;
1658         uio.uio_segflg = UIO_SYSSPACE;
1659         uio.uio_fmode = 0;
1660         uio.uio_extflg = UIO_COPY_CACHED;
1661         uio.uio_loffset = 0;
1662         uio.uio_llimit = MAXOFFSET_T;
1663 
1664         eof = 0;
1665         error = 0;
1666         while (!error && !eof) {
1667                 uio.uio_resid = dlen;
1668                 iov.iov_base = (char *)dbuf;
1669                 iov.iov_len = dlen;
1670 
1671                 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1672                 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0);
1673                 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1674 
1675                 dbuflen = dlen - uio.uio_resid;
1676 
1677                 if (error || dbuflen == 0)
1678                         break;
1679 
1680                 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
1681                     dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
1682 
1683                         nm = dp->d_name;
1684 
1685                         if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
1686                                 continue;
1687 
1688                         error = VOP_LOOKUP(dirvp, nm,
1689                             &vp, NULL, 0, NULL, kcred, NULL, NULL, NULL);
1690 
1691                         dsysdebug(error,
1692                             ("rem_drv %s/%s lookup (%d)\n",
1693                             dir, nm, error));
1694 
1695                         if (error)
1696                                 continue;
1697 
1698                         ASSERT(vp->v_type == VDIR ||
1699                             vp->v_type == VCHR || vp->v_type == VBLK);
1700 
1701                         if (vp->v_type == VDIR) {
1702                                 error = devfs_remdrv_rmdir(vp, nm, rvp);
1703                                 if (error == 0) {
1704                                         error = VOP_RMDIR(dirvp,
1705                                             (char *)nm, rvp, kcred, NULL, 0);
1706                                         dsysdebug(error,
1707                                             ("rem_drv %s/%s rmdir (%d)\n",
1708                                             dir, nm, error));
1709                                 }
1710                         } else {
1711                                 error = VOP_REMOVE(dirvp, (char *)nm, kcred,
1712                                     NULL, 0);
1713                                 dsysdebug(error,
1714                                     ("rem_drv %s/%s remove (%d)\n",
1715                                     dir, nm, error));
1716                         }
1717 
1718                         VN_RELE(vp);
1719                         if (error) {
1720                                 goto exit;
1721                         }
1722                 }
1723         }
1724 
1725 exit:
1726         VN_RELE(dirvp);
1727         kmem_free(dbuf, dlen);
1728 
1729         return (error);
1730 }
1731 
1732 int
1733 devfs_remdrv_cleanup(const char *dir, const char *nodename)
1734 {
1735         int             error;
1736         vnode_t         *vp;
1737         vnode_t         *dirvp;
1738         int             eof;
1739         struct iovec    iov;
1740         struct uio      uio;
1741         struct dirent64 *dp;
1742         dirent64_t      *dbuf;
1743         size_t          dlen;
1744         size_t          dbuflen;
1745         int             ndirents = 64;
1746         int             nodenamelen = strlen(nodename);
1747         char            *nm;
1748         struct pathname pn;
1749         vnode_t         *rvp;   /* root node of the underlying attribute fs */
1750 
1751         dcmn_err5(("devfs_remdrv_cleanup: %s %s\n", dir, nodename));
1752 
1753         if (error = pn_get((char *)dir, UIO_SYSSPACE, &pn))
1754                 return (0);
1755 
1756         rvp = dvroot->dv_attrvp;
1757         ASSERT(rvp != NULL);
1758         VN_HOLD(rvp);
1759 
1760         pn_skipslash(&pn);
1761         dirvp = rvp;
1762         VN_HOLD(dirvp);
1763 
1764         nm = kmem_alloc(MAXNAMELEN, KM_SLEEP);
1765 
1766         while (pn_pathleft(&pn)) {
1767                 ASSERT(dirvp->v_type == VDIR);
1768                 (void) pn_getcomponent(&pn, nm);
1769                 ASSERT((strcmp(nm, ".") != 0) && (strcmp(nm, "..") != 0));
1770                 error = VOP_LOOKUP(dirvp, nm, &vp, NULL, 0, rvp, kcred,
1771                     NULL, NULL, NULL);
1772                 if (error) {
1773                         dcmn_err5(("remdrv_cleanup %s lookup error %d\n",
1774                             nm, error));
1775                         VN_RELE(dirvp);
1776                         if (dirvp != rvp)
1777                                 VN_RELE(rvp);
1778                         pn_free(&pn);
1779                         kmem_free(nm, MAXNAMELEN);
1780                         return (0);
1781                 }
1782                 VN_RELE(dirvp);
1783                 dirvp = vp;
1784                 pn_skipslash(&pn);
1785         }
1786 
1787         ASSERT(dirvp->v_type == VDIR);
1788         if (dirvp != rvp)
1789                 VN_RELE(rvp);
1790         pn_free(&pn);
1791         kmem_free(nm, MAXNAMELEN);
1792 
1793         dlen = ndirents * (sizeof (*dbuf));
1794         dbuf = kmem_alloc(dlen, KM_SLEEP);
1795 
1796         uio.uio_iov = &iov;
1797         uio.uio_iovcnt = 1;
1798         uio.uio_segflg = UIO_SYSSPACE;
1799         uio.uio_fmode = 0;
1800         uio.uio_extflg = UIO_COPY_CACHED;
1801         uio.uio_loffset = 0;
1802         uio.uio_llimit = MAXOFFSET_T;
1803 
1804         eof = 0;
1805         error = 0;
1806         while (!error && !eof) {
1807                 uio.uio_resid = dlen;
1808                 iov.iov_base = (char *)dbuf;
1809                 iov.iov_len = dlen;
1810 
1811                 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1812                 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0);
1813                 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1814 
1815                 dbuflen = dlen - uio.uio_resid;
1816 
1817                 if (error || dbuflen == 0)
1818                         break;
1819 
1820                 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
1821                     dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
1822 
1823                         nm = dp->d_name;
1824 
1825                         if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
1826                                 continue;
1827 
1828                         if (strncmp(nm, nodename, nodenamelen) != 0)
1829                                 continue;
1830 
1831                         error = VOP_LOOKUP(dirvp, nm, &vp,
1832                             NULL, 0, NULL, kcred, NULL, NULL, NULL);
1833 
1834                         dsysdebug(error,
1835                             ("rem_drv %s/%s lookup (%d)\n",
1836                             dir, nm, error));
1837 
1838                         if (error)
1839                                 continue;
1840 
1841                         ASSERT(vp->v_type == VDIR ||
1842                             vp->v_type == VCHR || vp->v_type == VBLK);
1843 
1844                         if (vp->v_type == VDIR) {
1845                                 error = devfs_remdrv_rmdir(vp, nm, rvp);
1846                                 if (error == 0) {
1847                                         error = VOP_RMDIR(dirvp, (char *)nm,
1848                                             rvp, kcred, NULL, 0);
1849                                         dsysdebug(error,
1850                                             ("rem_drv %s/%s rmdir (%d)\n",
1851                                             dir, nm, error));
1852                                 }
1853                         } else {
1854                                 error = VOP_REMOVE(dirvp, (char *)nm, kcred,
1855                                     NULL, 0);
1856                                 dsysdebug(error,
1857                                     ("rem_drv %s/%s remove (%d)\n",
1858                                     dir, nm, error));
1859                         }
1860 
1861                         VN_RELE(vp);
1862                         if (error)
1863                                 goto exit;
1864                 }
1865         }
1866 
1867 exit:
1868         VN_RELE(dirvp);
1869 
1870         kmem_free(dbuf, dlen);
1871 
1872         return (0);
1873 }
1874 
1875 struct dv_list {
1876         struct dv_node  *dv;
1877         struct dv_list  *next;
1878 };
1879 
1880 void
1881 dv_walk(
1882         struct dv_node  *ddv,
1883         char            *devnm,
1884         void            (*callback)(struct dv_node *, void *),
1885         void            *arg)
1886 {
1887         struct vnode    *dvp;
1888         struct dv_node  *dv;
1889         struct dv_list  *head, *tail, *next;
1890         int             len;
1891 
1892         dcmn_err3(("dv_walk: ddv = %s, devnm = %s\n",
1893             ddv->dv_name, devnm ? devnm : "<null>"));
1894 
1895         dvp = DVTOV(ddv);
1896 
1897         ASSERT(dvp->v_type == VDIR);
1898 
1899         head = tail = next = NULL;
1900 
1901         rw_enter(&ddv->dv_contents, RW_READER);
1902         mutex_enter(&dvp->v_lock);
1903         for (dv = DV_FIRST_ENTRY(ddv); dv; dv = DV_NEXT_ENTRY(ddv, dv)) {
1904                 /*
1905                  * If devnm is not NULL and is not the empty string,
1906                  * select only dv_nodes with matching non-minor name
1907                  */
1908                 if (devnm && (len = strlen(devnm)) &&
1909                     (strncmp(devnm, dv->dv_name, len) ||
1910                     (dv->dv_name[len] != ':' && dv->dv_name[len] != '\0')))
1911                         continue;
1912 
1913                 callback(dv, arg);
1914 
1915                 if (DVTOV(dv)->v_type != VDIR)
1916                         continue;
1917 
1918                 next = kmem_zalloc(sizeof (*next), KM_SLEEP);
1919                 next->dv = dv;
1920 
1921                 if (tail)
1922                         tail->next = next;
1923                 else
1924                         head = next;
1925 
1926                 tail = next;
1927         }
1928 
1929         while (head) {
1930                 dv_walk(head->dv, NULL, callback, arg);
1931                 next = head->next;
1932                 kmem_free(head, sizeof (*head));
1933                 head = next;
1934         }
1935         rw_exit(&ddv->dv_contents);
1936         mutex_exit(&dvp->v_lock);
1937 }