1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2013, 2016 Joyent, Inc. All rights reserved.
  24  * Copyright (c) 2017 by Delphix. All rights reserved.
  25  */
  26 
  27 /*
  28  * utility routines for the /dev fs
  29  */
  30 
  31 #include <sys/types.h>
  32 #include <sys/param.h>
  33 #include <sys/t_lock.h>
  34 #include <sys/systm.h>
  35 #include <sys/sysmacros.h>
  36 #include <sys/user.h>
  37 #include <sys/time.h>
  38 #include <sys/vfs.h>
  39 #include <sys/vnode.h>
  40 #include <sys/file.h>
  41 #include <sys/fcntl.h>
  42 #include <sys/flock.h>
  43 #include <sys/kmem.h>
  44 #include <sys/uio.h>
  45 #include <sys/errno.h>
  46 #include <sys/stat.h>
  47 #include <sys/cred.h>
  48 #include <sys/dirent.h>
  49 #include <sys/pathname.h>
  50 #include <sys/cmn_err.h>
  51 #include <sys/debug.h>
  52 #include <sys/mode.h>
  53 #include <sys/policy.h>
  54 #include <fs/fs_subr.h>
  55 #include <sys/mount.h>
  56 #include <sys/fs/snode.h>
  57 #include <sys/fs/dv_node.h>
  58 #include <sys/fs/sdev_impl.h>
  59 #include <sys/sunndi.h>
  60 #include <sys/sunmdi.h>
  61 #include <sys/conf.h>
  62 #include <sys/proc.h>
  63 #include <sys/user.h>
  64 #include <sys/modctl.h>
  65 
  66 #ifdef DEBUG
  67 int sdev_debug = 0x00000001;
  68 int sdev_debug_cache_flags = 0;
  69 #endif
  70 
  71 /*
  72  * globals
  73  */
  74 /* prototype memory vattrs */
  75 vattr_t sdev_vattr_dir = {
  76         AT_TYPE|AT_MODE|AT_UID|AT_GID,          /* va_mask */
  77         VDIR,                                   /* va_type */
  78         SDEV_DIRMODE_DEFAULT,                   /* va_mode */
  79         SDEV_UID_DEFAULT,                       /* va_uid */
  80         SDEV_GID_DEFAULT,                       /* va_gid */
  81         0,                                      /* va_fsid */
  82         0,                                      /* va_nodeid */
  83         0,                                      /* va_nlink */
  84         0,                                      /* va_size */
  85         0,                                      /* va_atime */
  86         0,                                      /* va_mtime */
  87         0,                                      /* va_ctime */
  88         0,                                      /* va_rdev */
  89         0,                                      /* va_blksize */
  90         0,                                      /* va_nblocks */
  91         0                                       /* va_vcode */
  92 };
  93 
  94 vattr_t sdev_vattr_lnk = {
  95         AT_TYPE|AT_MODE,                        /* va_mask */
  96         VLNK,                                   /* va_type */
  97         SDEV_LNKMODE_DEFAULT,                   /* va_mode */
  98         SDEV_UID_DEFAULT,                       /* va_uid */
  99         SDEV_GID_DEFAULT,                       /* va_gid */
 100         0,                                      /* va_fsid */
 101         0,                                      /* va_nodeid */
 102         0,                                      /* va_nlink */
 103         0,                                      /* va_size */
 104         0,                                      /* va_atime */
 105         0,                                      /* va_mtime */
 106         0,                                      /* va_ctime */
 107         0,                                      /* va_rdev */
 108         0,                                      /* va_blksize */
 109         0,                                      /* va_nblocks */
 110         0                                       /* va_vcode */
 111 };
 112 
 113 vattr_t sdev_vattr_blk = {
 114         AT_TYPE|AT_MODE|AT_UID|AT_GID,          /* va_mask */
 115         VBLK,                                   /* va_type */
 116         S_IFBLK | SDEV_DEVMODE_DEFAULT,         /* va_mode */
 117         SDEV_UID_DEFAULT,                       /* va_uid */
 118         SDEV_GID_DEFAULT,                       /* va_gid */
 119         0,                                      /* va_fsid */
 120         0,                                      /* va_nodeid */
 121         0,                                      /* va_nlink */
 122         0,                                      /* va_size */
 123         0,                                      /* va_atime */
 124         0,                                      /* va_mtime */
 125         0,                                      /* va_ctime */
 126         0,                                      /* va_rdev */
 127         0,                                      /* va_blksize */
 128         0,                                      /* va_nblocks */
 129         0                                       /* va_vcode */
 130 };
 131 
 132 vattr_t sdev_vattr_chr = {
 133         AT_TYPE|AT_MODE|AT_UID|AT_GID,          /* va_mask */
 134         VCHR,                                   /* va_type */
 135         S_IFCHR | SDEV_DEVMODE_DEFAULT,         /* va_mode */
 136         SDEV_UID_DEFAULT,                       /* va_uid */
 137         SDEV_GID_DEFAULT,                       /* va_gid */
 138         0,                                      /* va_fsid */
 139         0,                                      /* va_nodeid */
 140         0,                                      /* va_nlink */
 141         0,                                      /* va_size */
 142         0,                                      /* va_atime */
 143         0,                                      /* va_mtime */
 144         0,                                      /* va_ctime */
 145         0,                                      /* va_rdev */
 146         0,                                      /* va_blksize */
 147         0,                                      /* va_nblocks */
 148         0                                       /* va_vcode */
 149 };
 150 
 151 kmem_cache_t    *sdev_node_cache;       /* sdev_node cache */
 152 int             devtype;                /* fstype */
 153 
 154 /* static */
 155 static struct vnodeops *sdev_get_vop(struct sdev_node *);
 156 static void sdev_set_no_negcache(struct sdev_node *);
 157 static fs_operation_def_t *sdev_merge_vtab(const fs_operation_def_t []);
 158 static void sdev_free_vtab(fs_operation_def_t *);
 159 
 160 static void
 161 sdev_prof_free(struct sdev_node *dv)
 162 {
 163         ASSERT(!SDEV_IS_GLOBAL(dv));
 164         nvlist_free(dv->sdev_prof.dev_name);
 165         nvlist_free(dv->sdev_prof.dev_map);
 166         nvlist_free(dv->sdev_prof.dev_symlink);
 167         nvlist_free(dv->sdev_prof.dev_glob_incdir);
 168         nvlist_free(dv->sdev_prof.dev_glob_excdir);
 169         bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
 170 }
 171 
 172 /* sdev_node cache constructor */
 173 /*ARGSUSED1*/
 174 static int
 175 i_sdev_node_ctor(void *buf, void *cfarg, int flag)
 176 {
 177         struct sdev_node *dv = (struct sdev_node *)buf;
 178         struct vnode *vp;
 179 
 180         bzero(buf, sizeof (struct sdev_node));
 181         vp = dv->sdev_vnode = vn_alloc(flag);
 182         if (vp == NULL) {
 183                 return (-1);
 184         }
 185         vp->v_data = dv;
 186         rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL);
 187         return (0);
 188 }
 189 
 190 /* sdev_node cache destructor */
 191 /*ARGSUSED1*/
 192 static void
 193 i_sdev_node_dtor(void *buf, void *arg)
 194 {
 195         struct sdev_node *dv = (struct sdev_node *)buf;
 196         struct vnode *vp = SDEVTOV(dv);
 197 
 198         rw_destroy(&dv->sdev_contents);
 199         vn_free(vp);
 200 }
 201 
 202 /* initialize sdev_node cache */
 203 void
 204 sdev_node_cache_init()
 205 {
 206         int flags = 0;
 207 
 208 #ifdef  DEBUG
 209         flags = sdev_debug_cache_flags;
 210         if (flags)
 211                 sdcmn_err(("cache debug flags 0x%x\n", flags));
 212 #endif  /* DEBUG */
 213 
 214         ASSERT(sdev_node_cache == NULL);
 215         sdev_node_cache = kmem_cache_create("sdev_node_cache",
 216             sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor,
 217             NULL, NULL, NULL, flags);
 218 }
 219 
 220 /* destroy sdev_node cache */
 221 void
 222 sdev_node_cache_fini()
 223 {
 224         ASSERT(sdev_node_cache != NULL);
 225         kmem_cache_destroy(sdev_node_cache);
 226         sdev_node_cache = NULL;
 227 }
 228 
 229 /*
 230  * Compare two nodes lexographically to balance avl tree
 231  */
 232 static int
 233 sdev_compare_nodes(const struct sdev_node *dv1, const struct sdev_node *dv2)
 234 {
 235         int rv;
 236         if ((rv = strcmp(dv1->sdev_name, dv2->sdev_name)) == 0)
 237                 return (0);
 238         return ((rv < 0) ? -1 : 1);
 239 }
 240 
 241 void
 242 sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state)
 243 {
 244         ASSERT(dv);
 245         ASSERT(RW_WRITE_HELD(&dv->sdev_contents));
 246         dv->sdev_state = state;
 247 }
 248 
 249 static void
 250 sdev_attr_update(struct sdev_node *dv, vattr_t *vap)
 251 {
 252         timestruc_t     now;
 253         struct vattr    *attrp;
 254         uint_t          mask;
 255 
 256         ASSERT(dv->sdev_attr);
 257         ASSERT(vap);
 258 
 259         attrp = dv->sdev_attr;
 260         mask = vap->va_mask;
 261         if (mask & AT_TYPE)
 262                 attrp->va_type = vap->va_type;
 263         if (mask & AT_MODE)
 264                 attrp->va_mode = vap->va_mode;
 265         if (mask & AT_UID)
 266                 attrp->va_uid = vap->va_uid;
 267         if (mask & AT_GID)
 268                 attrp->va_gid = vap->va_gid;
 269         if (mask & AT_RDEV)
 270                 attrp->va_rdev = vap->va_rdev;
 271 
 272         gethrestime(&now);
 273         attrp->va_atime = (mask & AT_ATIME) ? vap->va_atime : now;
 274         attrp->va_mtime = (mask & AT_MTIME) ? vap->va_mtime : now;
 275         attrp->va_ctime = (mask & AT_CTIME) ? vap->va_ctime : now;
 276 }
 277 
 278 static void
 279 sdev_attr_alloc(struct sdev_node *dv, vattr_t *vap)
 280 {
 281         ASSERT(dv->sdev_attr == NULL);
 282         ASSERT(vap->va_mask & AT_TYPE);
 283         ASSERT(vap->va_mask & AT_MODE);
 284 
 285         dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP);
 286         sdev_attr_update(dv, vap);
 287 }
 288 
 289 /* alloc and initialize a sdev_node */
 290 int
 291 sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
 292     vattr_t *vap)
 293 {
 294         struct sdev_node *dv = NULL;
 295         struct vnode *vp;
 296         size_t nmlen, len;
 297         devname_handle_t  *dhl;
 298 
 299         nmlen = strlen(nm) + 1;
 300         if (nmlen > MAXNAMELEN) {
 301                 sdcmn_err9(("sdev_nodeinit: node name %s"
 302                     " too long\n", nm));
 303                 *newdv = NULL;
 304                 return (ENAMETOOLONG);
 305         }
 306 
 307         dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
 308 
 309         dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP);
 310         bcopy(nm, dv->sdev_name, nmlen);
 311         dv->sdev_namelen = nmlen - 1;        /* '\0' not included */
 312         len = strlen(ddv->sdev_path) + strlen(nm) + 2;
 313         dv->sdev_path = kmem_alloc(len, KM_SLEEP);
 314         (void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm);
 315         /* overwritten for VLNK nodes */
 316         dv->sdev_symlink = NULL;
 317 
 318         vp = SDEVTOV(dv);
 319         vn_reinit(vp);
 320         vp->v_vfsp = SDEVTOV(ddv)->v_vfsp;
 321         if (vap)
 322                 vp->v_type = vap->va_type;
 323 
 324         /*
 325          * initialized to the parent's vnodeops.
 326          * maybe overwriten for a VDIR
 327          */
 328         vn_setops(vp, vn_getops(SDEVTOV(ddv)));
 329         vn_exists(vp);
 330 
 331         dv->sdev_dotdot = NULL;
 332         dv->sdev_attrvp = NULL;
 333         if (vap) {
 334                 sdev_attr_alloc(dv, vap);
 335         } else {
 336                 dv->sdev_attr = NULL;
 337         }
 338 
 339         dv->sdev_ino = sdev_mkino(dv);
 340         dv->sdev_nlink = 0;          /* updated on insert */
 341         dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */
 342         dv->sdev_flags |= SDEV_BUILD;
 343         mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
 344         cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
 345         if (SDEV_IS_GLOBAL(ddv)) {
 346                 dv->sdev_flags |= SDEV_GLOBAL;
 347                 dhl = &(dv->sdev_handle);
 348                 dhl->dh_data = dv;
 349                 dhl->dh_args = NULL;
 350                 sdev_set_no_negcache(dv);
 351                 dv->sdev_gdir_gen = 0;
 352         } else {
 353                 dv->sdev_flags &= ~SDEV_GLOBAL;
 354                 dv->sdev_origin = NULL; /* set later */
 355                 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
 356                 dv->sdev_ldir_gen = 0;
 357                 dv->sdev_devtree_gen = 0;
 358         }
 359 
 360         rw_enter(&dv->sdev_contents, RW_WRITER);
 361         sdev_set_nodestate(dv, SDEV_INIT);
 362         rw_exit(&dv->sdev_contents);
 363         *newdv = dv;
 364 
 365         return (0);
 366 }
 367 
 368 /*
 369  * Transition a sdev_node into SDEV_READY state. If this fails, it is up to the
 370  * caller to transition the node to the SDEV_ZOMBIE state.
 371  */
 372 int
 373 sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp,
 374     void *args, struct cred *cred)
 375 {
 376         int error = 0;
 377         struct vnode *vp = SDEVTOV(dv);
 378         vtype_t type;
 379 
 380         ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap);
 381 
 382         type = vap->va_type;
 383         vp->v_type = type;
 384         vp->v_rdev = vap->va_rdev;
 385         rw_enter(&dv->sdev_contents, RW_WRITER);
 386         if (type == VDIR) {
 387                 dv->sdev_nlink = 2;
 388                 dv->sdev_flags &= ~SDEV_PERSIST;
 389                 dv->sdev_flags &= ~SDEV_DYNAMIC;
 390                 vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */
 391                 ASSERT(dv->sdev_dotdot);
 392                 ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR);
 393                 vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev;
 394                 avl_create(&dv->sdev_entries,
 395                     (int (*)(const void *, const void *))sdev_compare_nodes,
 396                     sizeof (struct sdev_node),
 397                     offsetof(struct sdev_node, sdev_avllink));
 398         } else if (type == VLNK) {
 399                 ASSERT(args);
 400                 dv->sdev_nlink = 1;
 401                 dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP);
 402         } else {
 403                 dv->sdev_nlink = 1;
 404         }
 405 
 406         if (!(SDEV_IS_GLOBAL(dv))) {
 407                 dv->sdev_origin = (struct sdev_node *)args;
 408                 dv->sdev_flags &= ~SDEV_PERSIST;
 409         }
 410 
 411         /*
 412          * shadow node is created here OR
 413          * if failed (indicated by dv->sdev_attrvp == NULL),
 414          * created later in sdev_setattr
 415          */
 416         if (avp) {
 417                 dv->sdev_attrvp = avp;
 418         } else {
 419                 if (dv->sdev_attr == NULL) {
 420                         sdev_attr_alloc(dv, vap);
 421                 } else {
 422                         sdev_attr_update(dv, vap);
 423                 }
 424 
 425                 if ((dv->sdev_attrvp == NULL) && SDEV_IS_PERSIST(dv))
 426                         error = sdev_shadow_node(dv, cred);
 427         }
 428 
 429         if (error == 0) {
 430                 /* transition to READY state */
 431                 sdev_set_nodestate(dv, SDEV_READY);
 432                 sdev_nc_node_exists(dv);
 433         }
 434         rw_exit(&dv->sdev_contents);
 435         return (error);
 436 }
 437 
 438 /*
 439  * Build the VROOT sdev_node.
 440  */
 441 /*ARGSUSED*/
 442 struct sdev_node *
 443 sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp,
 444     struct vnode *avp, struct cred *cred)
 445 {
 446         struct sdev_node *dv;
 447         struct vnode *vp;
 448         char devdir[] = "/dev";
 449 
 450         ASSERT(sdev_node_cache != NULL);
 451         ASSERT(avp);
 452         dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
 453         vp = SDEVTOV(dv);
 454         vn_reinit(vp);
 455         vp->v_flag |= VROOT;
 456         vp->v_vfsp = vfsp;
 457         vp->v_type = VDIR;
 458         vp->v_rdev = devdev;
 459         vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */
 460         vn_exists(vp);
 461 
 462         if (vfsp->vfs_mntpt)
 463                 dv->sdev_name = i_ddi_strdup(
 464                     (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP);
 465         else
 466                 /* vfs_mountdev1 set mount point later */
 467                 dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP);
 468         dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */
 469         dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP);
 470         dv->sdev_ino = SDEV_ROOTINO;
 471         dv->sdev_nlink = 2;          /* name + . (no sdev_insert) */
 472         dv->sdev_dotdot = dv;                /* .. == self */
 473         dv->sdev_attrvp = avp;
 474         dv->sdev_attr = NULL;
 475         mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
 476         cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
 477         if (strcmp(dv->sdev_name, "/dev") == 0) {
 478                 dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST;
 479                 bzero(&dv->sdev_handle, sizeof (dv->sdev_handle));
 480                 dv->sdev_gdir_gen = 0;
 481         } else {
 482                 dv->sdev_flags = SDEV_BUILD;
 483                 dv->sdev_flags &= ~SDEV_PERSIST;
 484                 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
 485                 dv->sdev_ldir_gen = 0;
 486                 dv->sdev_devtree_gen = 0;
 487         }
 488 
 489         avl_create(&dv->sdev_entries,
 490             (int (*)(const void *, const void *))sdev_compare_nodes,
 491             sizeof (struct sdev_node),
 492             offsetof(struct sdev_node, sdev_avllink));
 493 
 494         rw_enter(&dv->sdev_contents, RW_WRITER);
 495         sdev_set_nodestate(dv, SDEV_READY);
 496         rw_exit(&dv->sdev_contents);
 497         sdev_nc_node_exists(dv);
 498         return (dv);
 499 }
 500 
 501 /* directory dependent vop table */
 502 struct sdev_vop_table {
 503         char *vt_name;                          /* subdirectory name */
 504         const fs_operation_def_t *vt_service;   /* vnodeops table */
 505         struct vnodeops *vt_vops;               /* constructed vop */
 506         struct vnodeops **vt_global_vops;       /* global container for vop */
 507         int (*vt_vtor)(struct sdev_node *);     /* validate sdev_node */
 508         int vt_flags;
 509 };
 510 
 511 /*
 512  * A nice improvement would be to provide a plug-in mechanism
 513  * for this table instead of a const table.
 514  */
 515 static struct sdev_vop_table vtab[] =
 516 {
 517         { "pts", devpts_vnodeops_tbl, NULL, &devpts_vnodeops, devpts_validate,
 518         SDEV_DYNAMIC | SDEV_VTOR },
 519 
 520         { "vt", devvt_vnodeops_tbl, NULL, &devvt_vnodeops, devvt_validate,
 521         SDEV_DYNAMIC | SDEV_VTOR },
 522 
 523         { "zvol", devzvol_vnodeops_tbl, NULL, &devzvol_vnodeops,
 524         devzvol_validate, SDEV_ZONED | SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR },
 525 
 526         { "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE },
 527 
 528         { "net", devnet_vnodeops_tbl, NULL, &devnet_vnodeops, devnet_validate,
 529         SDEV_DYNAMIC | SDEV_VTOR },
 530 
 531         { "ipnet", devipnet_vnodeops_tbl, NULL, &devipnet_vnodeops,
 532         devipnet_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_NO_NCACHE },
 533 
 534         /*
 535          * SDEV_DYNAMIC: prevent calling out to devfsadm, since only the
 536          * lofi driver controls child nodes.
 537          *
 538          * SDEV_PERSIST: ensure devfsadm knows to clean up any persisted
 539          * stale nodes (e.g. from devfsadm -R).
 540          *
 541          * In addition, devfsadm knows not to attempt a rmdir: a zone
 542          * may hold a reference, which would zombify the node,
 543          * preventing a mkdir.
 544          */
 545 
 546         { "lofi", NULL, NULL, NULL, NULL,
 547             SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST },
 548         { "rlofi", NULL, NULL, NULL, NULL,
 549             SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST },
 550 
 551         { NULL, NULL, NULL, NULL, NULL, 0}
 552 };
 553 
 554 /*
 555  * We need to match off of the sdev_path, not the sdev_name. We are only allowed
 556  * to exist directly under /dev.
 557  */
 558 struct sdev_vop_table *
 559 sdev_match(struct sdev_node *dv)
 560 {
 561         int vlen;
 562         int i;
 563         const char *path;
 564 
 565         if (strlen(dv->sdev_path) <= 5)
 566                 return (NULL);
 567 
 568         if (strncmp(dv->sdev_path, "/dev/", 5) != 0)
 569                 return (NULL);
 570         path = dv->sdev_path + 5;
 571 
 572         for (i = 0; vtab[i].vt_name; i++) {
 573                 if (strcmp(vtab[i].vt_name, path) == 0)
 574                         return (&vtab[i]);
 575                 if (vtab[i].vt_flags & SDEV_SUBDIR) {
 576                         vlen = strlen(vtab[i].vt_name);
 577                         if ((strncmp(vtab[i].vt_name, path,
 578                             vlen - 1) == 0) && path[vlen] == '/')
 579                                 return (&vtab[i]);
 580                 }
 581 
 582         }
 583         return (NULL);
 584 }
 585 
 586 /*
 587  *  sets a directory's vnodeops if the directory is in the vtab;
 588  */
 589 static struct vnodeops *
 590 sdev_get_vop(struct sdev_node *dv)
 591 {
 592         struct sdev_vop_table *vtp;
 593         char *path;
 594 
 595         path = dv->sdev_path;
 596         ASSERT(path);
 597 
 598         /* gets the relative path to /dev/ */
 599         path += 5;
 600 
 601         /* gets the vtab entry it matches */
 602         if ((vtp = sdev_match(dv)) != NULL) {
 603                 dv->sdev_flags |= vtp->vt_flags;
 604                 if (SDEV_IS_PERSIST(dv->sdev_dotdot) &&
 605                     (SDEV_IS_PERSIST(dv) || !SDEV_IS_DYNAMIC(dv)))
 606                         dv->sdev_flags |= SDEV_PERSIST;
 607 
 608                 if (vtp->vt_vops) {
 609                         if (vtp->vt_global_vops)
 610                                 *(vtp->vt_global_vops) = vtp->vt_vops;
 611 
 612                         return (vtp->vt_vops);
 613                 }
 614 
 615                 if (vtp->vt_service) {
 616                         fs_operation_def_t *templ;
 617                         templ = sdev_merge_vtab(vtp->vt_service);
 618                         if (vn_make_ops(vtp->vt_name,
 619                             (const fs_operation_def_t *)templ,
 620                             &vtp->vt_vops) != 0) {
 621                                 cmn_err(CE_PANIC, "%s: malformed vnode ops\n",
 622                                     vtp->vt_name);
 623                                 /*NOTREACHED*/
 624                         }
 625                         if (vtp->vt_global_vops) {
 626                                 *(vtp->vt_global_vops) = vtp->vt_vops;
 627                         }
 628                         sdev_free_vtab(templ);
 629 
 630                         return (vtp->vt_vops);
 631                 }
 632 
 633                 return (sdev_vnodeops);
 634         }
 635 
 636         /* child inherits the persistence of the parent */
 637         if (SDEV_IS_PERSIST(dv->sdev_dotdot))
 638                 dv->sdev_flags |= SDEV_PERSIST;
 639 
 640         return (sdev_vnodeops);
 641 }
 642 
 643 static void
 644 sdev_set_no_negcache(struct sdev_node *dv)
 645 {
 646         int i;
 647         char *path;
 648 
 649         ASSERT(dv->sdev_path);
 650         path = dv->sdev_path + strlen("/dev/");
 651 
 652         for (i = 0; vtab[i].vt_name; i++) {
 653                 if (strcmp(vtab[i].vt_name, path) == 0) {
 654                         if (vtab[i].vt_flags & SDEV_NO_NCACHE)
 655                                 dv->sdev_flags |= SDEV_NO_NCACHE;
 656                         break;
 657                 }
 658         }
 659 }
 660 
 661 void *
 662 sdev_get_vtor(struct sdev_node *dv)
 663 {
 664         struct sdev_vop_table *vtp;
 665 
 666         vtp = sdev_match(dv);
 667         if (vtp)
 668                 return ((void *)vtp->vt_vtor);
 669         else
 670                 return (NULL);
 671 }
 672 
 673 /*
 674  * Build the base root inode
 675  */
 676 ino_t
 677 sdev_mkino(struct sdev_node *dv)
 678 {
 679         ino_t   ino;
 680 
 681         /*
 682          * for now, follow the lead of tmpfs here
 683          * need to someday understand the requirements here
 684          */
 685         ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3);
 686         ino += SDEV_ROOTINO + 1;
 687 
 688         return (ino);
 689 }
 690 
 691 int
 692 sdev_getlink(struct vnode *linkvp, char **link)
 693 {
 694         int err;
 695         char *buf;
 696         struct uio uio = {0};
 697         struct iovec iov = {0};
 698 
 699         if (linkvp == NULL)
 700                 return (ENOENT);
 701         ASSERT(linkvp->v_type == VLNK);
 702 
 703         buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
 704         iov.iov_base = buf;
 705         iov.iov_len = MAXPATHLEN;
 706         uio.uio_iov = &iov;
 707         uio.uio_iovcnt = 1;
 708         uio.uio_resid = MAXPATHLEN;
 709         uio.uio_segflg = UIO_SYSSPACE;
 710         uio.uio_llimit = MAXOFFSET_T;
 711 
 712         err = VOP_READLINK(linkvp, &uio, kcred, NULL);
 713         if (err) {
 714                 cmn_err(CE_WARN, "readlink %s failed in dev\n", buf);
 715                 kmem_free(buf, MAXPATHLEN);
 716                 return (ENOENT);
 717         }
 718 
 719         /* mission complete */
 720         *link = i_ddi_strdup(buf, KM_SLEEP);
 721         kmem_free(buf, MAXPATHLEN);
 722         return (0);
 723 }
 724 
 725 /*
 726  * A convenient wrapper to get the devfs node vnode for a device
 727  * minor functionality: readlink() of a /dev symlink
 728  * Place the link into dv->sdev_symlink
 729  */
 730 static int
 731 sdev_follow_link(struct sdev_node *dv)
 732 {
 733         int err;
 734         struct vnode *linkvp;
 735         char *link = NULL;
 736 
 737         linkvp = SDEVTOV(dv);
 738         if (linkvp == NULL)
 739                 return (ENOENT);
 740         ASSERT(linkvp->v_type == VLNK);
 741         err = sdev_getlink(linkvp, &link);
 742         if (err) {
 743                 dv->sdev_symlink = NULL;
 744                 return (ENOENT);
 745         }
 746 
 747         ASSERT(link != NULL);
 748         dv->sdev_symlink = link;
 749         return (0);
 750 }
 751 
 752 static int
 753 sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs)
 754 {
 755         vtype_t otype = SDEVTOV(dv)->v_type;
 756 
 757         /*
 758          * existing sdev_node has a different type.
 759          */
 760         if (otype != nvap->va_type) {
 761                 sdcmn_err9(("sdev_node_check: existing node "
 762                     "  %s type %d does not match new node type %d\n",
 763                     dv->sdev_name, otype, nvap->va_type));
 764                 return (EEXIST);
 765         }
 766 
 767         /*
 768          * For a symlink, the target should be the same.
 769          */
 770         if (otype == VLNK) {
 771                 ASSERT(nargs != NULL);
 772                 ASSERT(dv->sdev_symlink != NULL);
 773                 if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) {
 774                         sdcmn_err9(("sdev_node_check: existing node "
 775                             " %s has different symlink %s as new node "
 776                             " %s\n", dv->sdev_name, dv->sdev_symlink,
 777                             (char *)nargs));
 778                         return (EEXIST);
 779                 }
 780         }
 781 
 782         return (0);
 783 }
 784 
 785 /*
 786  * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready()
 787  *
 788  * arguments:
 789  *      - ddv (parent)
 790  *      - nm (child name)
 791  *      - newdv (sdev_node for nm is returned here)
 792  *      - vap (vattr for the node to be created, va_type should be set.
 793  *      - avp (attribute vnode)
 794  *        the defaults should be used if unknown)
 795  *      - cred
 796  *      - args
 797  *          . tnm (for VLNK)
 798  *          . global sdev_node (for !SDEV_GLOBAL)
 799  *      - state: SDEV_INIT, SDEV_READY
 800  *
 801  * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT)
 802  *
 803  * NOTE:  directory contents writers lock needs to be held before
 804  *        calling this routine.
 805  */
 806 int
 807 sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
 808     struct vattr *vap, struct vnode *avp, void *args, struct cred *cred,
 809     sdev_node_state_t state)
 810 {
 811         int error = 0;
 812         sdev_node_state_t node_state;
 813         struct sdev_node *dv = NULL;
 814 
 815         ASSERT(state != SDEV_ZOMBIE);
 816         ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
 817 
 818         if (*newdv) {
 819                 dv = *newdv;
 820         } else {
 821                 /* allocate and initialize a sdev_node */
 822                 if (ddv->sdev_state == SDEV_ZOMBIE) {
 823                         sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n",
 824                             ddv->sdev_path));
 825                         return (ENOENT);
 826                 }
 827 
 828                 error = sdev_nodeinit(ddv, nm, &dv, vap);
 829                 if (error != 0) {
 830                         sdcmn_err9(("sdev_mknode: error %d,"
 831                             " name %s can not be initialized\n",
 832                             error, nm));
 833                         return (error);
 834                 }
 835                 ASSERT(dv);
 836 
 837                 /* insert into the directory cache */
 838                 sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD);
 839         }
 840 
 841         ASSERT(dv);
 842         node_state = dv->sdev_state;
 843         ASSERT(node_state != SDEV_ZOMBIE);
 844 
 845         if (state == SDEV_READY) {
 846                 switch (node_state) {
 847                 case SDEV_INIT:
 848                         error = sdev_nodeready(dv, vap, avp, args, cred);
 849                         if (error) {
 850                                 sdcmn_err9(("sdev_mknode: node %s can NOT"
 851                                     " be transitioned into READY state, "
 852                                     "error %d\n", nm, error));
 853                         }
 854                         break;
 855                 case SDEV_READY:
 856                         /*
 857                          * Do some sanity checking to make sure
 858                          * the existing sdev_node is what has been
 859                          * asked for.
 860                          */
 861                         error = sdev_node_check(dv, vap, args);
 862                         break;
 863                 default:
 864                         break;
 865                 }
 866         }
 867 
 868         if (!error) {
 869                 *newdv = dv;
 870                 ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE);
 871         } else {
 872                 sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE);
 873                 /*
 874                  * We created this node, it wasn't passed into us. Therefore it
 875                  * is up to us to delete it.
 876                  */
 877                 if (*newdv == NULL)
 878                         SDEV_SIMPLE_RELE(dv);
 879                 *newdv = NULL;
 880         }
 881 
 882         return (error);
 883 }
 884 
 885 /*
 886  * convenient wrapper to change vp's ATIME, CTIME and MTIME
 887  */
 888 void
 889 sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask)
 890 {
 891         struct vattr attr;
 892         timestruc_t now;
 893         int err;
 894 
 895         ASSERT(vp);
 896         gethrestime(&now);
 897         if (mask & AT_CTIME)
 898                 attr.va_ctime = now;
 899         if (mask & AT_MTIME)
 900                 attr.va_mtime = now;
 901         if (mask & AT_ATIME)
 902                 attr.va_atime = now;
 903 
 904         attr.va_mask = (mask & AT_TIMES);
 905         err = VOP_SETATTR(vp, &attr, 0, cred, NULL);
 906         if (err && (err != EROFS)) {
 907                 sdcmn_err(("update timestamps error %d\n", err));
 908         }
 909 }
 910 
 911 /*
 912  * the backing store vnode is released here
 913  */
 914 /*ARGSUSED1*/
 915 void
 916 sdev_nodedestroy(struct sdev_node *dv, uint_t flags)
 917 {
 918         /* no references */
 919         ASSERT(dv->sdev_nlink == 0);
 920 
 921         if (dv->sdev_attrvp != NULLVP) {
 922                 VN_RELE(dv->sdev_attrvp);
 923                 /*
 924                  * reset the attrvp so that no more
 925                  * references can be made on this already
 926                  * vn_rele() vnode
 927                  */
 928                 dv->sdev_attrvp = NULLVP;
 929         }
 930 
 931         if (dv->sdev_attr != NULL) {
 932                 kmem_free(dv->sdev_attr, sizeof (struct vattr));
 933                 dv->sdev_attr = NULL;
 934         }
 935 
 936         if (dv->sdev_name != NULL) {
 937                 kmem_free(dv->sdev_name, dv->sdev_namelen + 1);
 938                 dv->sdev_name = NULL;
 939         }
 940 
 941         if (dv->sdev_symlink != NULL) {
 942                 kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1);
 943                 dv->sdev_symlink = NULL;
 944         }
 945 
 946         if (dv->sdev_path) {
 947                 kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1);
 948                 dv->sdev_path = NULL;
 949         }
 950 
 951         if (!SDEV_IS_GLOBAL(dv))
 952                 sdev_prof_free(dv);
 953 
 954         if (SDEVTOV(dv)->v_type == VDIR) {
 955                 ASSERT(SDEV_FIRST_ENTRY(dv) == NULL);
 956                 avl_destroy(&dv->sdev_entries);
 957         }
 958 
 959         mutex_destroy(&dv->sdev_lookup_lock);
 960         cv_destroy(&dv->sdev_lookup_cv);
 961 
 962         /* return node to initial state as per constructor */
 963         (void) memset((void *)&dv->sdev_instance_data, 0,
 964             sizeof (dv->sdev_instance_data));
 965         vn_invalid(SDEVTOV(dv));
 966         kmem_cache_free(sdev_node_cache, dv);
 967 }
 968 
 969 /*
 970  * DIRECTORY CACHE lookup
 971  */
 972 struct sdev_node *
 973 sdev_findbyname(struct sdev_node *ddv, char *nm)
 974 {
 975         struct sdev_node *dv;
 976         struct sdev_node dvtmp;
 977         avl_index_t     where;
 978 
 979         ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
 980 
 981         dvtmp.sdev_name = nm;
 982         dv = avl_find(&ddv->sdev_entries, &dvtmp, &where);
 983         if (dv) {
 984                 ASSERT(dv->sdev_dotdot == ddv);
 985                 ASSERT(strcmp(dv->sdev_name, nm) == 0);
 986                 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
 987                 SDEV_HOLD(dv);
 988                 return (dv);
 989         }
 990         return (NULL);
 991 }
 992 
 993 /*
 994  * Inserts a new sdev_node in a parent directory
 995  */
 996 void
 997 sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv)
 998 {
 999         avl_index_t where;
1000 
1001         ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1002         ASSERT(SDEVTOV(ddv)->v_type == VDIR);
1003         ASSERT(ddv->sdev_nlink >= 2);
1004         ASSERT(dv->sdev_nlink == 0);
1005         ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1006 
1007         dv->sdev_dotdot = ddv;
1008         VERIFY(avl_find(&ddv->sdev_entries, dv, &where) == NULL);
1009         avl_insert(&ddv->sdev_entries, dv, where);
1010         ddv->sdev_nlink++;
1011 }
1012 
1013 /*
1014  * The following check is needed because while sdev_nodes are linked
1015  * in SDEV_INIT state, they have their link counts incremented only
1016  * in SDEV_READY state.
1017  */
1018 static void
1019 decr_link(struct sdev_node *dv)
1020 {
1021         VERIFY(RW_WRITE_HELD(&dv->sdev_contents));
1022         if (dv->sdev_state != SDEV_INIT) {
1023                 VERIFY(dv->sdev_nlink >= 1);
1024                 dv->sdev_nlink--;
1025         } else {
1026                 VERIFY(dv->sdev_nlink == 0);
1027         }
1028 }
1029 
1030 /*
1031  * Delete an existing dv from directory cache
1032  *
1033  * In the case of a node is still held by non-zero reference count, the node is
1034  * put into ZOMBIE state. The node is always unlinked from its parent, but it is
1035  * not destroyed via sdev_inactive until its reference count reaches "0".
1036  */
1037 static void
1038 sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv)
1039 {
1040         struct vnode *vp;
1041         sdev_node_state_t os;
1042 
1043         ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1044 
1045         vp = SDEVTOV(dv);
1046         mutex_enter(&vp->v_lock);
1047         rw_enter(&dv->sdev_contents, RW_WRITER);
1048         os = dv->sdev_state;
1049         ASSERT(os != SDEV_ZOMBIE);
1050         dv->sdev_state = SDEV_ZOMBIE;
1051 
1052         /*
1053          * unlink ourselves from the parent directory now to take care of the ..
1054          * link. However, if we're a directory, we don't remove our reference to
1055          * ourself eg. '.' until we are torn down in the inactive callback.
1056          */
1057         decr_link(ddv);
1058         avl_remove(&ddv->sdev_entries, dv);
1059         /*
1060          * sdev_inactive expects nodes to have a link to themselves when we're
1061          * tearing them down. If we're transitioning from the initial state to
1062          * zombie and not via ready, then we're not going to have this link that
1063          * comes from the node being ready. As a result, we need to increment
1064          * our link count by one to account for this.
1065          */
1066         if (os == SDEV_INIT && dv->sdev_nlink == 0)
1067                 dv->sdev_nlink++;
1068         rw_exit(&dv->sdev_contents);
1069         mutex_exit(&vp->v_lock);
1070 }
1071 
1072 /*
1073  * check if the source is in the path of the target
1074  *
1075  * source and target are different
1076  */
1077 /*ARGSUSED2*/
1078 static int
1079 sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred)
1080 {
1081         int error = 0;
1082         struct sdev_node *dotdot, *dir;
1083 
1084         dotdot = tdv->sdev_dotdot;
1085         ASSERT(dotdot);
1086 
1087         /* fs root */
1088         if (dotdot == tdv) {
1089                 return (0);
1090         }
1091 
1092         for (;;) {
1093                 /*
1094                  * avoid error cases like
1095                  *      mv a a/b
1096                  *      mv a a/b/c
1097                  *      etc.
1098                  */
1099                 if (dotdot == sdv) {
1100                         error = EINVAL;
1101                         break;
1102                 }
1103 
1104                 dir = dotdot;
1105                 dotdot = dir->sdev_dotdot;
1106 
1107                 /* done checking because root is reached */
1108                 if (dir == dotdot) {
1109                         break;
1110                 }
1111         }
1112         return (error);
1113 }
1114 
1115 int
1116 sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv,
1117     struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm,
1118     struct cred *cred)
1119 {
1120         int error = 0;
1121         struct vnode *ovp = SDEVTOV(odv);
1122         struct vnode *nvp;
1123         struct vattr vattr;
1124         int doingdir = (ovp->v_type == VDIR);
1125         char *link = NULL;
1126         int samedir = (oddv == nddv) ? 1 : 0;
1127         int bkstore = 0;
1128         struct sdev_node *idv = NULL;
1129         struct sdev_node *ndv = NULL;
1130         timestruc_t now;
1131 
1132         vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
1133         error = VOP_GETATTR(ovp, &vattr, 0, cred, NULL);
1134         if (error)
1135                 return (error);
1136 
1137         if (!samedir)
1138                 rw_enter(&oddv->sdev_contents, RW_WRITER);
1139         rw_enter(&nddv->sdev_contents, RW_WRITER);
1140 
1141         /*
1142          * the source may have been deleted by another thread before
1143          * we gets here.
1144          */
1145         if (odv->sdev_state != SDEV_READY) {
1146                 error = ENOENT;
1147                 goto err_out;
1148         }
1149 
1150         if (doingdir && (odv == nddv)) {
1151                 error = EINVAL;
1152                 goto err_out;
1153         }
1154 
1155         /*
1156          * If renaming a directory, and the parents are different (".." must be
1157          * changed) then the source dir must not be in the dir hierarchy above
1158          * the target since it would orphan everything below the source dir.
1159          */
1160         if (doingdir && (oddv != nddv)) {
1161                 error = sdev_checkpath(odv, nddv, cred);
1162                 if (error)
1163                         goto err_out;
1164         }
1165 
1166         /* fix the source for a symlink */
1167         if (vattr.va_type == VLNK) {
1168                 if (odv->sdev_symlink == NULL) {
1169                         error = sdev_follow_link(odv);
1170                         if (error) {
1171                                 /*
1172                                  * The underlying symlink doesn't exist. This
1173                                  * node probably shouldn't even exist. While
1174                                  * it's a bit jarring to consumers, we're going
1175                                  * to remove the node from /dev.
1176                                  */
1177                                 if (SDEV_IS_PERSIST((*ndvp)))
1178                                         bkstore = 1;
1179                                 sdev_dirdelete(oddv, odv);
1180                                 if (bkstore) {
1181                                         ASSERT(nddv->sdev_attrvp);
1182                                         error = VOP_REMOVE(nddv->sdev_attrvp,
1183                                             nnm, cred, NULL, 0);
1184                                         if (error)
1185                                                 goto err_out;
1186                                 }
1187                                 error = ENOENT;
1188                                 goto err_out;
1189                         }
1190                 }
1191                 ASSERT(odv->sdev_symlink);
1192                 link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP);
1193         }
1194 
1195         /* destination existing */
1196         if (*ndvp) {
1197                 nvp = SDEVTOV(*ndvp);
1198                 ASSERT(nvp);
1199 
1200                 /* handling renaming to itself */
1201                 if (odv == *ndvp) {
1202                         error = 0;
1203                         goto err_out;
1204                 }
1205 
1206                 if (nvp->v_type == VDIR) {
1207                         if (!doingdir) {
1208                                 error = EISDIR;
1209                                 goto err_out;
1210                         }
1211 
1212                         if (vn_vfswlock(nvp)) {
1213                                 error = EBUSY;
1214                                 goto err_out;
1215                         }
1216 
1217                         if (vn_mountedvfs(nvp) != NULL) {
1218                                 vn_vfsunlock(nvp);
1219                                 error = EBUSY;
1220                                 goto err_out;
1221                         }
1222 
1223                         /* in case dir1 exists in dir2 and "mv dir1 dir2" */
1224                         if ((*ndvp)->sdev_nlink > 2) {
1225                                 vn_vfsunlock(nvp);
1226                                 error = EEXIST;
1227                                 goto err_out;
1228                         }
1229                         vn_vfsunlock(nvp);
1230 
1231                         /*
1232                          * We did not place the hold on *ndvp, so even though
1233                          * we're deleting the node, we should not get rid of our
1234                          * reference.
1235                          */
1236                         sdev_dirdelete(nddv, *ndvp);
1237                         *ndvp = NULL;
1238                         ASSERT(nddv->sdev_attrvp);
1239                         error = VOP_RMDIR(nddv->sdev_attrvp, nnm,
1240                             nddv->sdev_attrvp, cred, NULL, 0);
1241                         if (error)
1242                                 goto err_out;
1243                 } else {
1244                         if (doingdir) {
1245                                 error = ENOTDIR;
1246                                 goto err_out;
1247                         }
1248 
1249                         if (SDEV_IS_PERSIST((*ndvp))) {
1250                                 bkstore = 1;
1251                         }
1252 
1253                         /*
1254                          * Get rid of the node from the directory cache note.
1255                          * Don't forget that it's not up to us to remove the vn
1256                          * ref on the sdev node, as we did not place it.
1257                          */
1258                         sdev_dirdelete(nddv, *ndvp);
1259                         *ndvp = NULL;
1260                         if (bkstore) {
1261                                 ASSERT(nddv->sdev_attrvp);
1262                                 error = VOP_REMOVE(nddv->sdev_attrvp,
1263                                     nnm, cred, NULL, 0);
1264                                 if (error)
1265                                         goto err_out;
1266                         }
1267                 }
1268         }
1269 
1270         /*
1271          * make a fresh node from the source attrs
1272          */
1273         ASSERT(RW_WRITE_HELD(&nddv->sdev_contents));
1274         error = sdev_mknode(nddv, nnm, ndvp, &vattr,
1275             NULL, (void *)link, cred, SDEV_READY);
1276 
1277         if (link != NULL) {
1278                 kmem_free(link, strlen(link) + 1);
1279                 link = NULL;
1280         }
1281 
1282         if (error)
1283                 goto err_out;
1284         ASSERT(*ndvp);
1285         ASSERT((*ndvp)->sdev_state == SDEV_READY);
1286 
1287         /* move dir contents */
1288         if (doingdir) {
1289                 for (idv = SDEV_FIRST_ENTRY(odv); idv;
1290                     idv = SDEV_NEXT_ENTRY(odv, idv)) {
1291                         SDEV_HOLD(idv);
1292                         error = sdev_rnmnode(odv, idv,
1293                             (struct sdev_node *)(*ndvp), &ndv,
1294                             idv->sdev_name, cred);
1295                         SDEV_RELE(idv);
1296                         if (error)
1297                                 goto err_out;
1298                         ndv = NULL;
1299                 }
1300         }
1301 
1302         if ((*ndvp)->sdev_attrvp) {
1303                 sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred,
1304                     AT_CTIME|AT_ATIME);
1305         } else {
1306                 ASSERT((*ndvp)->sdev_attr);
1307                 gethrestime(&now);
1308                 (*ndvp)->sdev_attr->va_ctime = now;
1309                 (*ndvp)->sdev_attr->va_atime = now;
1310         }
1311 
1312         if (nddv->sdev_attrvp) {
1313                 sdev_update_timestamps(nddv->sdev_attrvp, kcred,
1314                     AT_MTIME|AT_ATIME);
1315         } else {
1316                 ASSERT(nddv->sdev_attr);
1317                 gethrestime(&now);
1318                 nddv->sdev_attr->va_mtime = now;
1319                 nddv->sdev_attr->va_atime = now;
1320         }
1321         rw_exit(&nddv->sdev_contents);
1322         if (!samedir)
1323                 rw_exit(&oddv->sdev_contents);
1324 
1325         SDEV_RELE(*ndvp);
1326         return (error);
1327 
1328 err_out:
1329         if (link != NULL) {
1330                 kmem_free(link, strlen(link) + 1);
1331                 link = NULL;
1332         }
1333 
1334         rw_exit(&nddv->sdev_contents);
1335         if (!samedir)
1336                 rw_exit(&oddv->sdev_contents);
1337         return (error);
1338 }
1339 
1340 /*
1341  * Merge sdev_node specific information into an attribute structure.
1342  *
1343  * note: sdev_node is not locked here
1344  */
1345 void
1346 sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap)
1347 {
1348         struct vnode *vp = SDEVTOV(dv);
1349 
1350         vap->va_nlink = dv->sdev_nlink;
1351         vap->va_nodeid = dv->sdev_ino;
1352         vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev;
1353         vap->va_type = vp->v_type;
1354 
1355         if (vp->v_type == VDIR) {
1356                 vap->va_rdev = 0;
1357                 vap->va_fsid = vp->v_rdev;
1358         } else if (vp->v_type == VLNK) {
1359                 vap->va_rdev = 0;
1360                 vap->va_mode  &= ~S_IFMT;
1361                 vap->va_mode |= S_IFLNK;
1362         } else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) {
1363                 vap->va_rdev = vp->v_rdev;
1364                 vap->va_mode &= ~S_IFMT;
1365                 if (vap->va_type == VCHR)
1366                         vap->va_mode |= S_IFCHR;
1367                 else
1368                         vap->va_mode |= S_IFBLK;
1369         } else {
1370                 vap->va_rdev = 0;
1371         }
1372 }
1373 
1374 struct vattr *
1375 sdev_getdefault_attr(enum vtype type)
1376 {
1377         if (type == VDIR)
1378                 return (&sdev_vattr_dir);
1379         else if (type == VCHR)
1380                 return (&sdev_vattr_chr);
1381         else if (type == VBLK)
1382                 return (&sdev_vattr_blk);
1383         else if (type == VLNK)
1384                 return (&sdev_vattr_lnk);
1385         else
1386                 return (NULL);
1387 }
1388 int
1389 sdev_to_vp(struct sdev_node *dv, struct vnode **vpp)
1390 {
1391         int rv = 0;
1392         struct vnode *vp = SDEVTOV(dv);
1393 
1394         switch (vp->v_type) {
1395         case VCHR:
1396         case VBLK:
1397                 /*
1398                  * If vnode is a device, return special vnode instead
1399                  * (though it knows all about -us- via sp->s_realvp)
1400                  */
1401                 *vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred);
1402                 VN_RELE(vp);
1403                 if (*vpp == NULLVP)
1404                         rv = ENOSYS;
1405                 break;
1406         default:        /* most types are returned as is */
1407                 *vpp = vp;
1408                 break;
1409         }
1410         return (rv);
1411 }
1412 
1413 /*
1414  * junction between devname and root file system, e.g. ufs
1415  */
1416 int
1417 devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp)
1418 {
1419         struct vnode *rdvp = ddv->sdev_attrvp;
1420         int rval = 0;
1421 
1422         ASSERT(rdvp);
1423 
1424         rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred, NULL, NULL,
1425             NULL);
1426         return (rval);
1427 }
1428 
1429 static int
1430 sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred)
1431 {
1432         struct sdev_node *dv = NULL;
1433         char    *nm;
1434         struct vnode *dirvp;
1435         int     error;
1436         vnode_t *vp;
1437         int eof;
1438         struct iovec iov;
1439         struct uio uio;
1440         struct dirent64 *dp;
1441         dirent64_t *dbuf;
1442         size_t dbuflen;
1443         struct vattr vattr;
1444         char *link = NULL;
1445 
1446         if (ddv->sdev_attrvp == NULL)
1447                 return (0);
1448         if (!(ddv->sdev_flags & SDEV_BUILD))
1449                 return (0);
1450 
1451         dirvp = ddv->sdev_attrvp;
1452         VN_HOLD(dirvp);
1453         dbuf = kmem_zalloc(dlen, KM_SLEEP);
1454 
1455         uio.uio_iov = &iov;
1456         uio.uio_iovcnt = 1;
1457         uio.uio_segflg = UIO_SYSSPACE;
1458         uio.uio_fmode = 0;
1459         uio.uio_extflg = UIO_COPY_CACHED;
1460         uio.uio_loffset = 0;
1461         uio.uio_llimit = MAXOFFSET_T;
1462 
1463         eof = 0;
1464         error = 0;
1465         while (!error && !eof) {
1466                 uio.uio_resid = dlen;
1467                 iov.iov_base = (char *)dbuf;
1468                 iov.iov_len = dlen;
1469                 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1470                 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0);
1471                 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1472 
1473                 dbuflen = dlen - uio.uio_resid;
1474                 if (error || dbuflen == 0)
1475                         break;
1476 
1477                 if (!(ddv->sdev_flags & SDEV_BUILD))
1478                         break;
1479 
1480                 for (dp = dbuf; ((intptr_t)dp <
1481                     (intptr_t)dbuf + dbuflen);
1482                     dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
1483                         nm = dp->d_name;
1484 
1485                         if (strcmp(nm, ".") == 0 ||
1486                             strcmp(nm, "..") == 0)
1487                                 continue;
1488 
1489                         vp = NULLVP;
1490                         dv = sdev_cache_lookup(ddv, nm);
1491                         if (dv) {
1492                                 VERIFY(dv->sdev_state != SDEV_ZOMBIE);
1493                                 SDEV_SIMPLE_RELE(dv);
1494                                 continue;
1495                         }
1496 
1497                         /* refill the cache if not already */
1498                         error = devname_backstore_lookup(ddv, nm, &vp);
1499                         if (error)
1500                                 continue;
1501 
1502                         vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
1503                         error = VOP_GETATTR(vp, &vattr, 0, cred, NULL);
1504                         if (error)
1505                                 continue;
1506 
1507                         if (vattr.va_type == VLNK) {
1508                                 error = sdev_getlink(vp, &link);
1509                                 if (error) {
1510                                         continue;
1511                                 }
1512                                 ASSERT(link != NULL);
1513                         }
1514 
1515                         if (!rw_tryupgrade(&ddv->sdev_contents)) {
1516                                 rw_exit(&ddv->sdev_contents);
1517                                 rw_enter(&ddv->sdev_contents, RW_WRITER);
1518                         }
1519                         error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link,
1520                             cred, SDEV_READY);
1521                         rw_downgrade(&ddv->sdev_contents);
1522 
1523                         if (link != NULL) {
1524                                 kmem_free(link, strlen(link) + 1);
1525                                 link = NULL;
1526                         }
1527 
1528                         if (!error) {
1529                                 ASSERT(dv);
1530                                 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1531                                 SDEV_SIMPLE_RELE(dv);
1532                         }
1533                         vp = NULL;
1534                         dv = NULL;
1535                 }
1536         }
1537 
1538 done:
1539         VN_RELE(dirvp);
1540         kmem_free(dbuf, dlen);
1541 
1542         return (error);
1543 }
1544 
1545 void
1546 sdev_filldir_dynamic(struct sdev_node *ddv)
1547 {
1548         int error;
1549         int i;
1550         struct vattr vattr;
1551         struct vattr *vap = &vattr;
1552         char *nm = NULL;
1553         struct sdev_node *dv = NULL;
1554 
1555         ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1556         ASSERT((ddv->sdev_flags & SDEV_BUILD));
1557 
1558         *vap = *sdev_getdefault_attr(VDIR);     /* note structure copy here */
1559         gethrestime(&vap->va_atime);
1560         vap->va_mtime = vap->va_atime;
1561         vap->va_ctime = vap->va_atime;
1562         for (i = 0; vtab[i].vt_name != NULL; i++) {
1563                 /*
1564                  * This early, we may be in a read-only /dev environment: leave
1565                  * the creation of any nodes we'd attempt to persist to
1566                  * devfsadm. Because /dev itself is normally persistent, any
1567                  * node which is not marked dynamic will end up being marked
1568                  * persistent. However, some nodes are both dynamic and
1569                  * persistent, mostly lofi and rlofi, so we need to be careful
1570                  * in our check.
1571                  */
1572                 if ((vtab[i].vt_flags & SDEV_PERSIST) ||
1573                     !(vtab[i].vt_flags & SDEV_DYNAMIC))
1574                         continue;
1575                 nm = vtab[i].vt_name;
1576                 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1577                 dv = NULL;
1578                 error = sdev_mknode(ddv, nm, &dv, vap, NULL,
1579                     NULL, kcred, SDEV_READY);
1580                 if (error) {
1581                         cmn_err(CE_WARN, "%s/%s: error %d\n",
1582                             ddv->sdev_name, nm, error);
1583                 } else {
1584                         ASSERT(dv);
1585                         ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1586                         SDEV_SIMPLE_RELE(dv);
1587                 }
1588         }
1589 }
1590 
1591 /*
1592  * Creating a backing store entry based on sdev_attr.
1593  * This is called either as part of node creation in a persistent directory
1594  * or from setattr/setsecattr to persist access attributes across reboot.
1595  */
1596 int
1597 sdev_shadow_node(struct sdev_node *dv, struct cred *cred)
1598 {
1599         int error = 0;
1600         struct vnode *dvp = SDEVTOV(dv->sdev_dotdot);
1601         struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp;
1602         struct vattr *vap = dv->sdev_attr;
1603         char *nm = dv->sdev_name;
1604         struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL;
1605 
1606         ASSERT(dv && dv->sdev_name && rdvp);
1607         ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL);
1608 
1609 lookup:
1610         /* try to find it in the backing store */
1611         error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred, NULL, NULL,
1612             NULL);
1613         if (error == 0) {
1614                 if (VOP_REALVP(*rvp, &rrvp, NULL) == 0) {
1615                         VN_HOLD(rrvp);
1616                         VN_RELE(*rvp);
1617                         *rvp = rrvp;
1618                 }
1619 
1620                 kmem_free(dv->sdev_attr, sizeof (vattr_t));
1621                 dv->sdev_attr = NULL;
1622                 dv->sdev_attrvp = *rvp;
1623                 return (0);
1624         }
1625 
1626         /* let's try to persist the node */
1627         gethrestime(&vap->va_atime);
1628         vap->va_mtime = vap->va_atime;
1629         vap->va_ctime = vap->va_atime;
1630         vap->va_mask |= AT_TYPE|AT_MODE;
1631         switch (vap->va_type) {
1632         case VDIR:
1633                 error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL);
1634                 sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n",
1635                     (void *)(*rvp), error));
1636                 if (!error)
1637                         VN_RELE(*rvp);
1638                 break;
1639         case VCHR:
1640         case VBLK:
1641         case VREG:
1642         case VDOOR:
1643                 error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE,
1644                     rvp, cred, 0, NULL, NULL);
1645                 sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n",
1646                     (void *)(*rvp), error));
1647                 if (!error)
1648                         VN_RELE(*rvp);
1649                 break;
1650         case VLNK:
1651                 ASSERT(dv->sdev_symlink);
1652                 error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred,
1653                     NULL, 0);
1654                 sdcmn_err9(("sdev_shadow_node: create symlink error %d\n",
1655                     error));
1656                 break;
1657         default:
1658                 cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node "
1659                     "create\n", nm);
1660                 /*NOTREACHED*/
1661         }
1662 
1663         /* go back to lookup to factor out spec node and set attrvp */
1664         if (error == 0)
1665                 goto lookup;
1666 
1667         sdcmn_err(("cannot persist %s - error %d\n", dv->sdev_path, error));
1668         return (error);
1669 }
1670 
1671 static void
1672 sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm)
1673 {
1674         struct sdev_node *dup = NULL;
1675 
1676         ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1677         if ((dup = sdev_findbyname(ddv, nm)) == NULL) {
1678                 sdev_direnter(ddv, *dv);
1679         } else {
1680                 VERIFY(dup->sdev_state != SDEV_ZOMBIE);
1681                 SDEV_SIMPLE_RELE(*dv);
1682                 sdev_nodedestroy(*dv, 0);
1683                 *dv = dup;
1684         }
1685 }
1686 
1687 static void
1688 sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv)
1689 {
1690         ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1691         sdev_dirdelete(ddv, *dv);
1692 }
1693 
1694 /*
1695  * update the in-core directory cache
1696  */
1697 void
1698 sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm,
1699     sdev_cache_ops_t ops)
1700 {
1701         ASSERT((SDEV_HELD(*dv)));
1702 
1703         ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1704         switch (ops) {
1705         case SDEV_CACHE_ADD:
1706                 sdev_cache_add(ddv, dv, nm);
1707                 break;
1708         case SDEV_CACHE_DELETE:
1709                 sdev_cache_delete(ddv, dv);
1710                 break;
1711         default:
1712                 break;
1713         }
1714 }
1715 
1716 /*
1717  * retrieve the named entry from the directory cache
1718  */
1719 struct sdev_node *
1720 sdev_cache_lookup(struct sdev_node *ddv, char *nm)
1721 {
1722         struct sdev_node *dv = NULL;
1723 
1724         ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
1725         dv = sdev_findbyname(ddv, nm);
1726 
1727         return (dv);
1728 }
1729 
1730 /*
1731  * Implicit reconfig for nodes constructed by a link generator
1732  * Start devfsadm if needed, or if devfsadm is in progress,
1733  * prepare to block on devfsadm either completing or
1734  * constructing the desired node.  As devfsadmd is global
1735  * in scope, constructing all necessary nodes, we only
1736  * need to initiate it once.
1737  */
1738 static int
1739 sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm)
1740 {
1741         int error = 0;
1742 
1743         if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
1744                 sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n",
1745                     ddv->sdev_name, nm, devfsadm_state));
1746                 mutex_enter(&dv->sdev_lookup_lock);
1747                 SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING));
1748                 mutex_exit(&dv->sdev_lookup_lock);
1749                 error = 0;
1750         } else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) {
1751                 sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n",
1752                     ddv->sdev_name, nm, devfsadm_state));
1753 
1754                 sdev_devfsadmd_thread(ddv, dv, kcred);
1755                 mutex_enter(&dv->sdev_lookup_lock);
1756                 SDEV_BLOCK_OTHERS(dv,
1757                     (SDEV_LOOKUP | SDEV_LGWAITING));
1758                 mutex_exit(&dv->sdev_lookup_lock);
1759                 error = 0;
1760         } else {
1761                 error = -1;
1762         }
1763 
1764         return (error);
1765 }
1766 
1767 /*
1768  *  Support for specialized device naming construction mechanisms
1769  */
1770 static int
1771 sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm,
1772     int (*callback)(struct sdev_node *, char *, void **, struct cred *,
1773     void *, char *), int flags, struct cred *cred)
1774 {
1775         int rv = 0;
1776         char *physpath = NULL;
1777         struct vattr vattr;
1778         struct vattr *vap = &vattr;
1779         struct sdev_node *dv = NULL;
1780 
1781         ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1782         if (flags & SDEV_VLINK) {
1783                 physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1784                 rv = callback(ddv, nm, (void *)&physpath, kcred, NULL,
1785                     NULL);
1786                 if (rv) {
1787                         kmem_free(physpath, MAXPATHLEN);
1788                         return (-1);
1789                 }
1790 
1791                 *vap = *sdev_getdefault_attr(VLNK);     /* structure copy */
1792                 vap->va_size = strlen(physpath);
1793                 gethrestime(&vap->va_atime);
1794                 vap->va_mtime = vap->va_atime;
1795                 vap->va_ctime = vap->va_atime;
1796 
1797                 rv = sdev_mknode(ddv, nm, &dv, vap, NULL,
1798                     (void *)physpath, cred, SDEV_READY);
1799                 kmem_free(physpath, MAXPATHLEN);
1800                 if (rv)
1801                         return (rv);
1802         } else if (flags & SDEV_VATTR) {
1803                 /*
1804                  * /dev/pts
1805                  *
1806                  * callback is responsible to set the basic attributes,
1807                  * e.g. va_type/va_uid/va_gid/
1808                  *    dev_t if VCHR or VBLK/
1809                  */
1810                 ASSERT(callback);
1811                 rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL);
1812                 if (rv) {
1813                         sdcmn_err3(("devname_lookup_func: SDEV_NONE "
1814                             "callback failed \n"));
1815                         return (-1);
1816                 }
1817 
1818                 rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL,
1819                     cred, SDEV_READY);
1820 
1821                 if (rv)
1822                         return (rv);
1823 
1824         } else {
1825                 impossible(("lookup: %s/%s by %s not supported (%d)\n",
1826                     SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm,
1827                     __LINE__));
1828                 rv = -1;
1829         }
1830 
1831         *dvp = dv;
1832         return (rv);
1833 }
1834 
1835 static int
1836 is_devfsadm_thread(char *exec_name)
1837 {
1838         /*
1839          * note: because devfsadmd -> /usr/sbin/devfsadm
1840          * it is safe to use "devfsadm" to capture the lookups
1841          * from devfsadm and its daemon version.
1842          */
1843         if (strcmp(exec_name, "devfsadm") == 0)
1844                 return (1);
1845         return (0);
1846 }
1847 
1848 /*
1849  * Lookup Order:
1850  *      sdev_node cache;
1851  *      backing store (SDEV_PERSIST);
1852  *      DBNR: a. dir_ops implemented in the loadable modules;
1853  *            b. vnode ops in vtab.
1854  */
1855 int
1856 devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp,
1857     struct cred *cred, int (*callback)(struct sdev_node *, char *, void **,
1858     struct cred *, void *, char *), int flags)
1859 {
1860         int rv = 0, nmlen;
1861         struct vnode *rvp = NULL;
1862         struct sdev_node *dv = NULL;
1863         int     retried = 0;
1864         int     error = 0;
1865         struct vattr vattr;
1866         char *lookup_thread = curproc->p_user.u_comm;
1867         int failed_flags = 0;
1868         int (*vtor)(struct sdev_node *) = NULL;
1869         int state;
1870         int parent_state;
1871         char *link = NULL;
1872 
1873         if (SDEVTOV(ddv)->v_type != VDIR)
1874                 return (ENOTDIR);
1875 
1876         /*
1877          * Empty name or ., return node itself.
1878          */
1879         nmlen = strlen(nm);
1880         if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) {
1881                 *vpp = SDEVTOV(ddv);
1882                 VN_HOLD(*vpp);
1883                 return (0);
1884         }
1885 
1886         /*
1887          * .., return the parent directory
1888          */
1889         if ((nmlen == 2) && (strcmp(nm, "..") == 0)) {
1890                 *vpp = SDEVTOV(ddv->sdev_dotdot);
1891                 VN_HOLD(*vpp);
1892                 return (0);
1893         }
1894 
1895         rw_enter(&ddv->sdev_contents, RW_READER);
1896         if (ddv->sdev_flags & SDEV_VTOR) {
1897                 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
1898                 ASSERT(vtor);
1899         }
1900 
1901 tryagain:
1902         /*
1903          * (a) directory cache lookup:
1904          */
1905         ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1906         parent_state = ddv->sdev_state;
1907         dv = sdev_cache_lookup(ddv, nm);
1908         if (dv) {
1909                 state = dv->sdev_state;
1910                 switch (state) {
1911                 case SDEV_INIT:
1912                         if (is_devfsadm_thread(lookup_thread))
1913                                 break;
1914 
1915                         /* ZOMBIED parent won't allow node creation */
1916                         if (parent_state == SDEV_ZOMBIE) {
1917                                 SD_TRACE_FAILED_LOOKUP(ddv, nm,
1918                                     retried);
1919                                 goto nolock_notfound;
1920                         }
1921 
1922                         mutex_enter(&dv->sdev_lookup_lock);
1923                         /* compensate the threads started after devfsadm */
1924                         if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
1925                             !(SDEV_IS_LOOKUP(dv)))
1926                                 SDEV_BLOCK_OTHERS(dv,
1927                                     (SDEV_LOOKUP | SDEV_LGWAITING));
1928 
1929                         if (SDEV_IS_LOOKUP(dv)) {
1930                                 failed_flags |= SLF_REBUILT;
1931                                 rw_exit(&ddv->sdev_contents);
1932                                 error = sdev_wait4lookup(dv, SDEV_LOOKUP);
1933                                 mutex_exit(&dv->sdev_lookup_lock);
1934                                 rw_enter(&ddv->sdev_contents, RW_READER);
1935 
1936                                 if (error != 0) {
1937                                         SD_TRACE_FAILED_LOOKUP(ddv, nm,
1938                                             retried);
1939                                         goto nolock_notfound;
1940                                 }
1941 
1942                                 state = dv->sdev_state;
1943                                 if (state == SDEV_INIT) {
1944                                         SD_TRACE_FAILED_LOOKUP(ddv, nm,
1945                                             retried);
1946                                         goto nolock_notfound;
1947                                 } else if (state == SDEV_READY) {
1948                                         goto found;
1949                                 } else if (state == SDEV_ZOMBIE) {
1950                                         rw_exit(&ddv->sdev_contents);
1951                                         SD_TRACE_FAILED_LOOKUP(ddv, nm,
1952                                             retried);
1953                                         SDEV_RELE(dv);
1954                                         goto lookup_failed;
1955                                 }
1956                         } else {
1957                                 mutex_exit(&dv->sdev_lookup_lock);
1958                         }
1959                         break;
1960                 case SDEV_READY:
1961                         goto found;
1962                 case SDEV_ZOMBIE:
1963                         rw_exit(&ddv->sdev_contents);
1964                         SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1965                         SDEV_RELE(dv);
1966                         goto lookup_failed;
1967                 default:
1968                         rw_exit(&ddv->sdev_contents);
1969                         SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1970                         sdev_lookup_failed(ddv, nm, failed_flags);
1971                         *vpp = NULLVP;
1972                         return (ENOENT);
1973                 }
1974         }
1975         ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1976 
1977         /*
1978          * ZOMBIED parent does not allow new node creation.
1979          * bail out early
1980          */
1981         if (parent_state == SDEV_ZOMBIE) {
1982                 rw_exit(&ddv->sdev_contents);
1983                 *vpp = NULLVP;
1984                 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1985                 return (ENOENT);
1986         }
1987 
1988         /*
1989          * (b0): backing store lookup
1990          *      SDEV_PERSIST is default except:
1991          *              1) pts nodes
1992          *              2) non-chmod'ed local nodes
1993          *              3) zvol nodes
1994          */
1995         if (SDEV_IS_PERSIST(ddv)) {
1996                 error = devname_backstore_lookup(ddv, nm, &rvp);
1997 
1998                 if (!error) {
1999 
2000                         vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
2001                         error = VOP_GETATTR(rvp, &vattr, 0, cred, NULL);
2002                         if (error) {
2003                                 rw_exit(&ddv->sdev_contents);
2004                                 if (dv)
2005                                         SDEV_RELE(dv);
2006                                 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2007                                 sdev_lookup_failed(ddv, nm, failed_flags);
2008                                 *vpp = NULLVP;
2009                                 return (ENOENT);
2010                         }
2011 
2012                         if (vattr.va_type == VLNK) {
2013                                 error = sdev_getlink(rvp, &link);
2014                                 if (error) {
2015                                         rw_exit(&ddv->sdev_contents);
2016                                         if (dv)
2017                                                 SDEV_RELE(dv);
2018                                         SD_TRACE_FAILED_LOOKUP(ddv, nm,
2019                                             retried);
2020                                         sdev_lookup_failed(ddv, nm,
2021                                             failed_flags);
2022                                         *vpp = NULLVP;
2023                                         return (ENOENT);
2024                                 }
2025                                 ASSERT(link != NULL);
2026                         }
2027 
2028                         if (!rw_tryupgrade(&ddv->sdev_contents)) {
2029                                 rw_exit(&ddv->sdev_contents);
2030                                 rw_enter(&ddv->sdev_contents, RW_WRITER);
2031                         }
2032                         error = sdev_mknode(ddv, nm, &dv, &vattr,
2033                             rvp, link, cred, SDEV_READY);
2034                         rw_downgrade(&ddv->sdev_contents);
2035 
2036                         if (link != NULL) {
2037                                 kmem_free(link, strlen(link) + 1);
2038                                 link = NULL;
2039                         }
2040 
2041                         if (error) {
2042                                 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2043                                 rw_exit(&ddv->sdev_contents);
2044                                 if (dv)
2045                                         SDEV_RELE(dv);
2046                                 goto lookup_failed;
2047                         } else {
2048                                 goto found;
2049                         }
2050                 } else if (retried) {
2051                         rw_exit(&ddv->sdev_contents);
2052                         sdcmn_err3(("retry of lookup of %s/%s: failed\n",
2053                             ddv->sdev_name, nm));
2054                         if (dv)
2055                                 SDEV_RELE(dv);
2056                         SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2057                         sdev_lookup_failed(ddv, nm, failed_flags);
2058                         *vpp = NULLVP;
2059                         return (ENOENT);
2060                 }
2061         }
2062 
2063 lookup_create_node:
2064         /* first thread that is doing the lookup on this node */
2065         if (callback) {
2066                 ASSERT(dv == NULL);
2067                 if (!rw_tryupgrade(&ddv->sdev_contents)) {
2068                         rw_exit(&ddv->sdev_contents);
2069                         rw_enter(&ddv->sdev_contents, RW_WRITER);
2070                 }
2071                 error = sdev_call_dircallback(ddv, &dv, nm, callback,
2072                     flags, cred);
2073                 rw_downgrade(&ddv->sdev_contents);
2074                 if (error == 0) {
2075                         goto found;
2076                 } else {
2077                         SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2078                         rw_exit(&ddv->sdev_contents);
2079                         goto lookup_failed;
2080                 }
2081         }
2082         if (!dv) {
2083                 if (!rw_tryupgrade(&ddv->sdev_contents)) {
2084                         rw_exit(&ddv->sdev_contents);
2085                         rw_enter(&ddv->sdev_contents, RW_WRITER);
2086                 }
2087                 error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL,
2088                     cred, SDEV_INIT);
2089                 if (!dv) {
2090                         rw_exit(&ddv->sdev_contents);
2091                         SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2092                         sdev_lookup_failed(ddv, nm, failed_flags);
2093                         *vpp = NULLVP;
2094                         return (ENOENT);
2095                 }
2096                 rw_downgrade(&ddv->sdev_contents);
2097         }
2098 
2099         /*
2100          * (b1) invoking devfsadm once per life time for devfsadm nodes
2101          */
2102         ASSERT(SDEV_HELD(dv));
2103 
2104         if (SDEV_IS_NO_NCACHE(dv))
2105                 failed_flags |= SLF_NO_NCACHE;
2106         if (sdev_reconfig_boot || !i_ddi_io_initialized() ||
2107             SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) ||
2108             ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) {
2109                 ASSERT(SDEV_HELD(dv));
2110                 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2111                 goto nolock_notfound;
2112         }
2113 
2114         /*
2115          * filter out known non-existent devices recorded
2116          * during initial reconfiguration boot for which
2117          * reconfig should not be done and lookup may
2118          * be short-circuited now.
2119          */
2120         if (sdev_lookup_filter(ddv, nm)) {
2121                 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2122                 goto nolock_notfound;
2123         }
2124 
2125         /* bypassing devfsadm internal nodes */
2126         if (is_devfsadm_thread(lookup_thread)) {
2127                 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2128                 goto nolock_notfound;
2129         }
2130 
2131         if (sdev_reconfig_disable) {
2132                 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2133                 goto nolock_notfound;
2134         }
2135 
2136         error = sdev_call_devfsadmd(ddv, dv, nm);
2137         if (error == 0) {
2138                 sdcmn_err8(("lookup of %s/%s by %s: reconfig\n",
2139                     ddv->sdev_name, nm, curproc->p_user.u_comm));
2140                 if (sdev_reconfig_verbose) {
2141                         cmn_err(CE_CONT,
2142                             "?lookup of %s/%s by %s: reconfig\n",
2143                             ddv->sdev_name, nm, curproc->p_user.u_comm);
2144                 }
2145                 retried = 1;
2146                 failed_flags |= SLF_REBUILT;
2147                 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
2148                 SDEV_SIMPLE_RELE(dv);
2149                 goto tryagain;
2150         } else {
2151                 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2152                 goto nolock_notfound;
2153         }
2154 
2155 found:
2156         ASSERT(dv->sdev_state == SDEV_READY);
2157         if (vtor) {
2158                 /*
2159                  * Check validity of returned node
2160                  */
2161                 switch (vtor(dv)) {
2162                 case SDEV_VTOR_VALID:
2163                         break;
2164                 case SDEV_VTOR_STALE:
2165                         /*
2166                          * The name exists, but the cache entry is
2167                          * stale and needs to be re-created.
2168                          */
2169                         ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2170                         if (rw_tryupgrade(&ddv->sdev_contents) == 0) {
2171                                 rw_exit(&ddv->sdev_contents);
2172                                 rw_enter(&ddv->sdev_contents, RW_WRITER);
2173                         }
2174 
2175                         if (SDEVTOV(dv)->v_type == VDIR)
2176                                 (void) sdev_cleandir(dv, NULL, SDEV_ENFORCE);
2177 
2178                         sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE);
2179                         rw_downgrade(&ddv->sdev_contents);
2180                         SDEV_RELE(dv);
2181                         dv = NULL;
2182                         goto lookup_create_node;
2183                         /* FALLTHRU */
2184                 case SDEV_VTOR_INVALID:
2185                         SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2186                         sdcmn_err7(("lookup: destroy invalid "
2187                             "node: %s(%p)\n", dv->sdev_name, (void *)dv));
2188                         goto nolock_notfound;
2189                 case SDEV_VTOR_SKIP:
2190                         sdcmn_err7(("lookup: node not applicable - "
2191                             "skipping: %s(%p)\n", dv->sdev_name, (void *)dv));
2192                         rw_exit(&ddv->sdev_contents);
2193                         SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2194                         SDEV_RELE(dv);
2195                         goto lookup_failed;
2196                 default:
2197                         cmn_err(CE_PANIC,
2198                             "dev fs: validator failed: %s(%p)\n",
2199                             dv->sdev_name, (void *)dv);
2200                         break;
2201                 }
2202         }
2203 
2204         rw_exit(&ddv->sdev_contents);
2205         rv = sdev_to_vp(dv, vpp);
2206         sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d "
2207             "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count,
2208             dv->sdev_state, nm, rv));
2209         return (rv);
2210 
2211 nolock_notfound:
2212         /*
2213          * Destroy the node that is created for synchronization purposes.
2214          */
2215         sdcmn_err3(("devname_lookup_func: %s with state %d\n",
2216             nm, dv->sdev_state));
2217         ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2218         if (dv->sdev_state == SDEV_INIT) {
2219                 if (!rw_tryupgrade(&ddv->sdev_contents)) {
2220                         rw_exit(&ddv->sdev_contents);
2221                         rw_enter(&ddv->sdev_contents, RW_WRITER);
2222                 }
2223 
2224                 /*
2225                  * Node state may have changed during the lock
2226                  * changes. Re-check.
2227                  */
2228                 if (dv->sdev_state == SDEV_INIT) {
2229                         sdev_dirdelete(ddv, dv);
2230                         rw_exit(&ddv->sdev_contents);
2231                         sdev_lookup_failed(ddv, nm, failed_flags);
2232                         SDEV_RELE(dv);
2233                         *vpp = NULL;
2234                         return (ENOENT);
2235                 }
2236         }
2237 
2238         rw_exit(&ddv->sdev_contents);
2239         SDEV_RELE(dv);
2240 
2241 lookup_failed:
2242         sdev_lookup_failed(ddv, nm, failed_flags);
2243         *vpp = NULL;
2244         return (ENOENT);
2245 }
2246 
2247 /*
2248  * Given a directory node, mark all nodes beneath as
2249  * STALE, i.e. nodes that don't exist as far as new
2250  * consumers are concerned.  Remove them from the
2251  * list of directory entries so that no lookup or
2252  * directory traversal will find them.  The node
2253  * not deallocated so existing holds are not affected.
2254  */
2255 void
2256 sdev_stale(struct sdev_node *ddv)
2257 {
2258         struct sdev_node *dv;
2259         struct vnode *vp;
2260 
2261         ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2262 
2263         rw_enter(&ddv->sdev_contents, RW_WRITER);
2264         while ((dv = SDEV_FIRST_ENTRY(ddv)) != NULL) {
2265                 vp = SDEVTOV(dv);
2266                 SDEV_HOLD(dv);
2267                 if (vp->v_type == VDIR)
2268                         sdev_stale(dv);
2269 
2270                 sdev_dirdelete(ddv, dv);
2271                 SDEV_RELE(dv);
2272         }
2273         ddv->sdev_flags |= SDEV_BUILD;
2274         rw_exit(&ddv->sdev_contents);
2275 }
2276 
2277 /*
2278  * Given a directory node, clean out all the nodes beneath.
2279  * If expr is specified, clean node with names matching expr.
2280  * If SDEV_ENFORCE is specified in flags, busy nodes are made stale,
2281  *      so they are excluded from future lookups.
2282  */
2283 int
2284 sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags)
2285 {
2286         int error = 0;
2287         int busy = 0;
2288         struct vnode *vp;
2289         struct sdev_node *dv, *next;
2290         int bkstore = 0;
2291         int len = 0;
2292         char *bks_name = NULL;
2293 
2294         ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2295 
2296         /*
2297          * We try our best to destroy all unused sdev_node's
2298          */
2299         rw_enter(&ddv->sdev_contents, RW_WRITER);
2300         for (dv = SDEV_FIRST_ENTRY(ddv); dv != NULL; dv = next) {
2301                 next = SDEV_NEXT_ENTRY(ddv, dv);
2302                 vp = SDEVTOV(dv);
2303 
2304                 if (expr && gmatch(dv->sdev_name, expr) == 0)
2305                         continue;
2306 
2307                 if (vp->v_type == VDIR &&
2308                     sdev_cleandir(dv, NULL, flags) != 0) {
2309                         sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2310                             dv->sdev_name));
2311                         busy++;
2312                         continue;
2313                 }
2314 
2315                 if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) {
2316                         sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2317                             dv->sdev_name));
2318                         busy++;
2319                         continue;
2320                 }
2321 
2322                 /*
2323                  * at this point, either dv is not held or SDEV_ENFORCE
2324                  * is specified. In either case, dv needs to be deleted
2325                  */
2326                 SDEV_HOLD(dv);
2327 
2328                 bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0;
2329                 if (bkstore && (vp->v_type == VDIR))
2330                         bkstore += 1;
2331 
2332                 if (bkstore) {
2333                         len = strlen(dv->sdev_name) + 1;
2334                         bks_name = kmem_alloc(len, KM_SLEEP);
2335                         bcopy(dv->sdev_name, bks_name, len);
2336                 }
2337 
2338                 sdev_dirdelete(ddv, dv);
2339 
2340                 /* take care the backing store clean up */
2341                 if (bkstore) {
2342                         ASSERT(bks_name);
2343                         ASSERT(ddv->sdev_attrvp);
2344 
2345                         if (bkstore == 1) {
2346                                 error = VOP_REMOVE(ddv->sdev_attrvp,
2347                                     bks_name, kcred, NULL, 0);
2348                         } else if (bkstore == 2) {
2349                                 error = VOP_RMDIR(ddv->sdev_attrvp,
2350                                     bks_name, ddv->sdev_attrvp, kcred, NULL, 0);
2351                         }
2352 
2353                         /* do not propagate the backing store errors */
2354                         if (error) {
2355                                 sdcmn_err9(("sdev_cleandir: backing store"
2356                                     "not cleaned\n"));
2357                                 error = 0;
2358                         }
2359 
2360                         bkstore = 0;
2361                         kmem_free(bks_name, len);
2362                         bks_name = NULL;
2363                         len = 0;
2364                 }
2365 
2366                 ddv->sdev_flags |= SDEV_BUILD;
2367                 SDEV_RELE(dv);
2368         }
2369 
2370         ddv->sdev_flags |= SDEV_BUILD;
2371         rw_exit(&ddv->sdev_contents);
2372 
2373         if (busy) {
2374                 error = EBUSY;
2375         }
2376 
2377         return (error);
2378 }
2379 
2380 /*
2381  * a convenient wrapper for readdir() funcs
2382  */
2383 size_t
2384 add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off)
2385 {
2386         size_t reclen = DIRENT64_RECLEN(strlen(nm));
2387         if (reclen > size)
2388                 return (0);
2389 
2390         de->d_ino = (ino64_t)ino;
2391         de->d_off = (off64_t)off + 1;
2392         de->d_reclen = (ushort_t)reclen;
2393         (void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen));
2394         return (reclen);
2395 }
2396 
2397 /*
2398  * sdev_mount service routines
2399  */
2400 int
2401 sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args)
2402 {
2403         int     error;
2404 
2405         if (uap->datalen != sizeof (*args))
2406                 return (EINVAL);
2407 
2408         if (error = copyin(uap->dataptr, args, sizeof (*args))) {
2409                 cmn_err(CE_WARN, "sdev_copyin_mountargs: can not"
2410                     "get user data. error %d\n", error);
2411                 return (EFAULT);
2412         }
2413 
2414         return (0);
2415 }
2416 
2417 #ifdef nextdp
2418 #undef nextdp
2419 #endif
2420 #define nextdp(dp)      ((struct dirent64 *) \
2421                             (intptr_t)((char *)(dp) + (dp)->d_reclen))
2422 
2423 /*
2424  * readdir helper func
2425  */
2426 int
2427 devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp,
2428     int flags)
2429 {
2430         struct sdev_node *ddv = VTOSDEV(vp);
2431         struct sdev_node *dv;
2432         dirent64_t      *dp;
2433         ulong_t         outcount = 0;
2434         size_t          namelen;
2435         ulong_t         alloc_count;
2436         void            *outbuf;
2437         struct iovec    *iovp;
2438         int             error = 0;
2439         size_t          reclen;
2440         offset_t        diroff;
2441         offset_t        soff;
2442         int             this_reclen;
2443         int (*vtor)(struct sdev_node *) = NULL;
2444         struct vattr attr;
2445         timestruc_t now;
2446 
2447         ASSERT(ddv->sdev_attr || ddv->sdev_attrvp);
2448         ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2449 
2450         if (uiop->uio_loffset >= MAXOFF_T) {
2451                 if (eofp)
2452                         *eofp = 1;
2453                 return (0);
2454         }
2455 
2456         if (uiop->uio_iovcnt != 1)
2457                 return (EINVAL);
2458 
2459         if (vp->v_type != VDIR)
2460                 return (ENOTDIR);
2461 
2462         if (ddv->sdev_flags & SDEV_VTOR) {
2463                 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
2464                 ASSERT(vtor);
2465         }
2466 
2467         if (eofp != NULL)
2468                 *eofp = 0;
2469 
2470         soff = uiop->uio_loffset;
2471         iovp = uiop->uio_iov;
2472         alloc_count = iovp->iov_len;
2473         dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP);
2474         outcount = 0;
2475 
2476         if (ddv->sdev_state == SDEV_ZOMBIE)
2477                 goto get_cache;
2478 
2479         if (SDEV_IS_GLOBAL(ddv)) {
2480 
2481                 if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) &&
2482                     !sdev_reconfig_boot && (flags & SDEV_BROWSE) &&
2483                     !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) &&
2484                     ((moddebug & MODDEBUG_FINI_EBUSY) == 0) &&
2485                     !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) &&
2486                     !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
2487                     !sdev_reconfig_disable) {
2488                         /*
2489                          * invoking "devfsadm" to do system device reconfig
2490                          */
2491                         mutex_enter(&ddv->sdev_lookup_lock);
2492                         SDEV_BLOCK_OTHERS(ddv,
2493                             (SDEV_READDIR|SDEV_LGWAITING));
2494                         mutex_exit(&ddv->sdev_lookup_lock);
2495 
2496                         sdcmn_err8(("readdir of %s by %s: reconfig\n",
2497                             ddv->sdev_path, curproc->p_user.u_comm));
2498                         if (sdev_reconfig_verbose) {
2499                                 cmn_err(CE_CONT,
2500                                     "?readdir of %s by %s: reconfig\n",
2501                                     ddv->sdev_path, curproc->p_user.u_comm);
2502                         }
2503 
2504                         sdev_devfsadmd_thread(ddv, NULL, kcred);
2505                 } else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
2506                         /*
2507                          * compensate the "ls" started later than "devfsadm"
2508                          */
2509                         mutex_enter(&ddv->sdev_lookup_lock);
2510                         SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING));
2511                         mutex_exit(&ddv->sdev_lookup_lock);
2512                 }
2513 
2514                 /*
2515                  * release the contents lock so that
2516                  * the cache may be updated by devfsadmd
2517                  */
2518                 rw_exit(&ddv->sdev_contents);
2519                 mutex_enter(&ddv->sdev_lookup_lock);
2520                 if (SDEV_IS_READDIR(ddv))
2521                         (void) sdev_wait4lookup(ddv, SDEV_READDIR);
2522                 mutex_exit(&ddv->sdev_lookup_lock);
2523                 rw_enter(&ddv->sdev_contents, RW_READER);
2524 
2525                 sdcmn_err4(("readdir of directory %s by %s\n",
2526                     ddv->sdev_name, curproc->p_user.u_comm));
2527                 if (ddv->sdev_flags & SDEV_BUILD) {
2528                         if (SDEV_IS_PERSIST(ddv)) {
2529                                 error = sdev_filldir_from_store(ddv,
2530                                     alloc_count, cred);
2531                         }
2532                         ddv->sdev_flags &= ~SDEV_BUILD;
2533                 }
2534         }
2535 
2536 get_cache:
2537         /* handle "." and ".." */
2538         diroff = 0;
2539         if (soff == 0) {
2540                 /* first time */
2541                 this_reclen = DIRENT64_RECLEN(1);
2542                 if (alloc_count < this_reclen) {
2543                         error = EINVAL;
2544                         goto done;
2545                 }
2546 
2547                 dp->d_ino = (ino64_t)ddv->sdev_ino;
2548                 dp->d_off = (off64_t)1;
2549                 dp->d_reclen = (ushort_t)this_reclen;
2550 
2551                 (void) strncpy(dp->d_name, ".",
2552                     DIRENT64_NAMELEN(this_reclen));
2553                 outcount += dp->d_reclen;
2554                 dp = nextdp(dp);
2555         }
2556 
2557         diroff++;
2558         if (soff <= 1) {
2559                 this_reclen = DIRENT64_RECLEN(2);
2560                 if (alloc_count < outcount + this_reclen) {
2561                         error = EINVAL;
2562                         goto done;
2563                 }
2564 
2565                 dp->d_reclen = (ushort_t)this_reclen;
2566                 dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino;
2567                 dp->d_off = (off64_t)2;
2568 
2569                 (void) strncpy(dp->d_name, "..",
2570                     DIRENT64_NAMELEN(this_reclen));
2571                 outcount += dp->d_reclen;
2572 
2573                 dp = nextdp(dp);
2574         }
2575 
2576 
2577         /* gets the cache */
2578         diroff++;
2579         for (dv = SDEV_FIRST_ENTRY(ddv); dv;
2580             dv = SDEV_NEXT_ENTRY(ddv, dv), diroff++) {
2581                 sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n",
2582                     diroff, soff, dv->sdev_name));
2583 
2584                 /* bypassing pre-matured nodes */
2585                 if (diroff < soff || (dv->sdev_state != SDEV_READY)) {
2586                         sdcmn_err3(("sdev_readdir: pre-mature node  "
2587                             "%s %d\n", dv->sdev_name, dv->sdev_state));
2588                         continue;
2589                 }
2590 
2591                 /*
2592                  * Check validity of node
2593                  * Drop invalid and nodes to be skipped.
2594                  * A node the validator indicates as stale needs
2595                  * to be returned as presumably the node name itself
2596                  * is valid and the node data itself will be refreshed
2597                  * on lookup.  An application performing a readdir then
2598                  * stat on each entry should thus always see consistent
2599                  * data.  In any case, it is not possible to synchronize
2600                  * with dynamic kernel state, and any view we return can
2601                  * never be anything more than a snapshot at a point in time.
2602                  */
2603                 if (vtor) {
2604                         switch (vtor(dv)) {
2605                         case SDEV_VTOR_VALID:
2606                                 break;
2607                         case SDEV_VTOR_INVALID:
2608                         case SDEV_VTOR_SKIP:
2609                                 continue;
2610                         case SDEV_VTOR_STALE:
2611                                 sdcmn_err3(("sdev_readir: %s stale\n",
2612                                     dv->sdev_name));
2613                                 break;
2614                         default:
2615                                 cmn_err(CE_PANIC,
2616                                     "dev fs: validator failed: %s(%p)\n",
2617                                     dv->sdev_name, (void *)dv);
2618                                 break;
2619                         /*NOTREACHED*/
2620                         }
2621                 }
2622 
2623                 namelen = strlen(dv->sdev_name);
2624                 reclen = DIRENT64_RECLEN(namelen);
2625                 if (outcount + reclen > alloc_count) {
2626                         goto full;
2627                 }
2628                 dp->d_reclen = (ushort_t)reclen;
2629                 dp->d_ino = (ino64_t)dv->sdev_ino;
2630                 dp->d_off = (off64_t)diroff + 1;
2631                 (void) strncpy(dp->d_name, dv->sdev_name,
2632                     DIRENT64_NAMELEN(reclen));
2633                 outcount += reclen;
2634                 dp = nextdp(dp);
2635         }
2636 
2637 full:
2638         sdcmn_err4(("sdev_readdir: moving %lu bytes: "
2639             "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff,
2640             (void *)dv));
2641 
2642         if (outcount)
2643                 error = uiomove(outbuf, outcount, UIO_READ, uiop);
2644 
2645         if (!error) {
2646                 uiop->uio_loffset = diroff;
2647                 if (eofp)
2648                         *eofp = dv ? 0 : 1;
2649         }
2650 
2651 
2652         if (ddv->sdev_attrvp) {
2653                 gethrestime(&now);
2654                 attr.va_ctime = now;
2655                 attr.va_atime = now;
2656                 attr.va_mask = AT_CTIME|AT_ATIME;
2657 
2658                 (void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL);
2659         }
2660 done:
2661         kmem_free(outbuf, alloc_count);
2662         return (error);
2663 }
2664 
2665 static int
2666 sdev_modctl_lookup(const char *path, vnode_t **r_vp)
2667 {
2668         vnode_t *vp;
2669         vnode_t *cvp;
2670         struct sdev_node *svp;
2671         char *nm;
2672         struct pathname pn;
2673         int error;
2674         int persisted = 0;
2675 
2676         ASSERT(INGLOBALZONE(curproc));
2677 
2678         if (error = pn_get((char *)path, UIO_SYSSPACE, &pn))
2679                 return (error);
2680         nm = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2681 
2682         vp = rootdir;
2683         VN_HOLD(vp);
2684 
2685         while (pn_pathleft(&pn)) {
2686                 ASSERT(vp->v_type == VDIR || vp->v_type == VLNK);
2687                 (void) pn_getcomponent(&pn, nm);
2688 
2689                 /*
2690                  * Deal with the .. special case where we may be
2691                  * traversing up across a mount point, to the
2692                  * root of this filesystem or global root.
2693                  */
2694                 if (nm[0] == '.' && nm[1] == '.' && nm[2] == 0) {
2695 checkforroot:
2696                         if (VN_CMP(vp, rootdir)) {
2697                                 nm[1] = 0;
2698                         } else if (vp->v_flag & VROOT) {
2699                                 vfs_t *vfsp;
2700                                 cvp = vp;
2701                                 vfsp = cvp->v_vfsp;
2702                                 vfs_rlock_wait(vfsp);
2703                                 vp = cvp->v_vfsp->vfs_vnodecovered;
2704                                 if (vp == NULL ||
2705                                     (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) {
2706                                         vfs_unlock(vfsp);
2707                                         VN_RELE(cvp);
2708                                         error = EIO;
2709                                         break;
2710                                 }
2711                                 VN_HOLD(vp);
2712                                 vfs_unlock(vfsp);
2713                                 VN_RELE(cvp);
2714                                 cvp = NULL;
2715                                 goto checkforroot;
2716                         }
2717                 }
2718 
2719                 error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred, NULL,
2720                     NULL, NULL);
2721                 if (error) {
2722                         VN_RELE(vp);
2723                         break;
2724                 }
2725 
2726                 /* traverse mount points encountered on our journey */
2727                 if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) {
2728                         VN_RELE(vp);
2729                         VN_RELE(cvp);
2730                         break;
2731                 }
2732 
2733                 /*
2734                  * symbolic link, can be either relative and absolute
2735                  */
2736                 if ((cvp->v_type == VLNK) && pn_pathleft(&pn)) {
2737                         struct pathname linkpath;
2738                         pn_alloc(&linkpath);
2739                         if (error = pn_getsymlink(cvp, &linkpath, kcred)) {
2740                                 pn_free(&linkpath);
2741                                 break;
2742                         }
2743                         if (pn_pathleft(&linkpath) == 0)
2744                                 (void) pn_set(&linkpath, ".");
2745                         error = pn_insert(&pn, &linkpath, strlen(nm));
2746                         pn_free(&linkpath);
2747                         if (pn.pn_pathlen == 0) {
2748                                 VN_RELE(vp);
2749                                 return (ENOENT);
2750                         }
2751                         if (pn.pn_path[0] == '/') {
2752                                 pn_skipslash(&pn);
2753                                 VN_RELE(vp);
2754                                 VN_RELE(cvp);
2755                                 vp = rootdir;
2756                                 VN_HOLD(vp);
2757                         } else {
2758                                 VN_RELE(cvp);
2759                         }
2760                         continue;
2761                 }
2762 
2763                 VN_RELE(vp);
2764 
2765                 /*
2766                  * Direct the operation to the persisting filesystem
2767                  * underlying /dev.  Bail if we encounter a
2768                  * non-persistent dev entity here.
2769                  */
2770                 if (cvp->v_vfsp->vfs_fstype == devtype) {
2771 
2772                         if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) {
2773                                 error = ENOENT;
2774                                 VN_RELE(cvp);
2775                                 break;
2776                         }
2777 
2778                         if (VTOSDEV(cvp) == NULL) {
2779                                 error = ENOENT;
2780                                 VN_RELE(cvp);
2781                                 break;
2782                         }
2783                         svp = VTOSDEV(cvp);
2784                         if ((vp = svp->sdev_attrvp) == NULL) {
2785                                 error = ENOENT;
2786                                 VN_RELE(cvp);
2787                                 break;
2788                         }
2789                         persisted = 1;
2790                         VN_HOLD(vp);
2791                         VN_RELE(cvp);
2792                         cvp = vp;
2793                 }
2794 
2795                 vp = cvp;
2796                 pn_skipslash(&pn);
2797         }
2798 
2799         kmem_free(nm, MAXNAMELEN);
2800         pn_free(&pn);
2801 
2802         if (error)
2803                 return (error);
2804 
2805         /*
2806          * Only return persisted nodes in the filesystem underlying /dev.
2807          */
2808         if (!persisted) {
2809                 VN_RELE(vp);
2810                 return (ENOENT);
2811         }
2812 
2813         *r_vp = vp;
2814         return (0);
2815 }
2816 
2817 int
2818 sdev_modctl_readdir(const char *dir, char ***dirlistp, int *npathsp,
2819     int *npathsp_alloc, int checking_empty)
2820 {
2821         char    **pathlist = NULL;
2822         char    **newlist = NULL;
2823         int     npaths = 0;
2824         int     npaths_alloc = 0;
2825         dirent64_t *dbuf = NULL;
2826         int     n;
2827         char    *s;
2828         int error;
2829         vnode_t *vp;
2830         int eof;
2831         struct iovec iov;
2832         struct uio uio;
2833         struct dirent64 *dp;
2834         size_t dlen;
2835         size_t dbuflen;
2836         int ndirents = 64;
2837         char *nm;
2838 
2839         error = sdev_modctl_lookup(dir, &vp);
2840         sdcmn_err11(("modctl readdir: %s by %s: %s\n",
2841             dir, curproc->p_user.u_comm,
2842             (error == 0) ? "ok" : "failed"));
2843         if (error)
2844                 return (error);
2845 
2846         dlen = ndirents * (sizeof (*dbuf));
2847         dbuf = kmem_alloc(dlen, KM_SLEEP);
2848 
2849         uio.uio_iov = &iov;
2850         uio.uio_iovcnt = 1;
2851         uio.uio_segflg = UIO_SYSSPACE;
2852         uio.uio_fmode = 0;
2853         uio.uio_extflg = UIO_COPY_CACHED;
2854         uio.uio_loffset = 0;
2855         uio.uio_llimit = MAXOFFSET_T;
2856 
2857         eof = 0;
2858         error = 0;
2859         while (!error && !eof) {
2860                 uio.uio_resid = dlen;
2861                 iov.iov_base = (char *)dbuf;
2862                 iov.iov_len = dlen;
2863 
2864                 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2865                 error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0);
2866                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2867 
2868                 dbuflen = dlen - uio.uio_resid;
2869 
2870                 if (error || dbuflen == 0)
2871                         break;
2872 
2873                 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
2874                     dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
2875 
2876                         nm = dp->d_name;
2877 
2878                         if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
2879                                 continue;
2880                         if (npaths == npaths_alloc) {
2881                                 npaths_alloc += 64;
2882                                 newlist = (char **)
2883                                     kmem_zalloc((npaths_alloc + 1) *
2884                                     sizeof (char *), KM_SLEEP);
2885                                 if (pathlist) {
2886                                         bcopy(pathlist, newlist,
2887                                             npaths * sizeof (char *));
2888                                         kmem_free(pathlist,
2889                                             (npaths + 1) * sizeof (char *));
2890                                 }
2891                                 pathlist = newlist;
2892                         }
2893                         n = strlen(nm) + 1;
2894                         s = kmem_alloc(n, KM_SLEEP);
2895                         bcopy(nm, s, n);
2896                         pathlist[npaths++] = s;
2897                         sdcmn_err11(("  %s/%s\n", dir, s));
2898 
2899                         /* if checking empty, one entry is as good as many */
2900                         if (checking_empty) {
2901                                 eof = 1;
2902                                 break;
2903                         }
2904                 }
2905         }
2906 
2907 exit:
2908         VN_RELE(vp);
2909 
2910         if (dbuf)
2911                 kmem_free(dbuf, dlen);
2912 
2913         if (error)
2914                 return (error);
2915 
2916         *dirlistp = pathlist;
2917         *npathsp = npaths;
2918         *npathsp_alloc = npaths_alloc;
2919 
2920         return (0);
2921 }
2922 
2923 void
2924 sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc)
2925 {
2926         int     i, n;
2927 
2928         for (i = 0; i < npaths; i++) {
2929                 n = strlen(pathlist[i]) + 1;
2930                 kmem_free(pathlist[i], n);
2931         }
2932 
2933         kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *));
2934 }
2935 
2936 int
2937 sdev_modctl_devexists(const char *path)
2938 {
2939         vnode_t *vp;
2940         int error;
2941 
2942         error = sdev_modctl_lookup(path, &vp);
2943         sdcmn_err11(("modctl dev exists: %s by %s: %s\n",
2944             path, curproc->p_user.u_comm,
2945             (error == 0) ? "ok" : "failed"));
2946         if (error == 0)
2947                 VN_RELE(vp);
2948 
2949         return (error);
2950 }
2951 
2952 extern int sdev_vnodeops_tbl_size;
2953 
2954 /*
2955  * construct a new template with overrides from vtab
2956  */
2957 static fs_operation_def_t *
2958 sdev_merge_vtab(const fs_operation_def_t tab[])
2959 {
2960         fs_operation_def_t *new;
2961         const fs_operation_def_t *tab_entry;
2962 
2963         /* make a copy of standard vnode ops table */
2964         new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP);
2965         bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size);
2966 
2967         /* replace the overrides from tab */
2968         for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) {
2969                 fs_operation_def_t *std_entry = new;
2970                 while (std_entry->name) {
2971                         if (strcmp(tab_entry->name, std_entry->name) == 0) {
2972                                 std_entry->func = tab_entry->func;
2973                                 break;
2974                         }
2975                         std_entry++;
2976                 }
2977                 if (std_entry->name == NULL)
2978                         cmn_err(CE_NOTE, "sdev_merge_vtab: entry %s unused.",
2979                             tab_entry->name);
2980         }
2981 
2982         return (new);
2983 }
2984 
2985 /* free memory allocated by sdev_merge_vtab */
2986 static void
2987 sdev_free_vtab(fs_operation_def_t *new)
2988 {
2989         kmem_free(new, sdev_vnodeops_tbl_size);
2990 }
2991 
2992 /*
2993  * a generic setattr() function
2994  *
2995  * note: flags only supports AT_UID and AT_GID.
2996  *       Future enhancements can be done for other types, e.g. AT_MODE
2997  */
2998 int
2999 devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags,
3000     struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *,
3001     int), int protocol)
3002 {
3003         struct sdev_node        *dv = VTOSDEV(vp);
3004         struct sdev_node        *parent = dv->sdev_dotdot;
3005         struct vattr            *get;
3006         uint_t                  mask = vap->va_mask;
3007         int                     error;
3008 
3009         /* some sanity checks */
3010         if (vap->va_mask & AT_NOSET)
3011                 return (EINVAL);
3012 
3013         if (vap->va_mask & AT_SIZE) {
3014                 if (vp->v_type == VDIR) {
3015                         return (EISDIR);
3016                 }
3017         }
3018 
3019         /* no need to set attribute, but do not fail either */
3020         ASSERT(parent);
3021         rw_enter(&parent->sdev_contents, RW_READER);
3022         if (dv->sdev_state == SDEV_ZOMBIE) {
3023                 rw_exit(&parent->sdev_contents);
3024                 return (0);
3025         }
3026 
3027         /* If backing store exists, just set it. */
3028         if (dv->sdev_attrvp) {
3029                 rw_exit(&parent->sdev_contents);
3030                 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3031         }
3032 
3033         /*
3034          * Otherwise, for nodes with the persistence attribute, create it.
3035          */
3036         ASSERT(dv->sdev_attr);
3037         if (SDEV_IS_PERSIST(dv) ||
3038             ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) {
3039                 sdev_vattr_merge(dv, vap);
3040                 rw_enter(&dv->sdev_contents, RW_WRITER);
3041                 error = sdev_shadow_node(dv, cred);
3042                 rw_exit(&dv->sdev_contents);
3043                 rw_exit(&parent->sdev_contents);
3044 
3045                 if (error)
3046                         return (error);
3047                 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3048         }
3049 
3050 
3051         /*
3052          * sdev_attr was allocated in sdev_mknode
3053          */
3054         rw_enter(&dv->sdev_contents, RW_WRITER);
3055         error = secpolicy_vnode_setattr(cred, vp, vap,
3056             dv->sdev_attr, flags, sdev_unlocked_access, dv);
3057         if (error) {
3058                 rw_exit(&dv->sdev_contents);
3059                 rw_exit(&parent->sdev_contents);
3060                 return (error);
3061         }
3062 
3063         get = dv->sdev_attr;
3064         if (mask & AT_MODE) {
3065                 get->va_mode &= S_IFMT;
3066                 get->va_mode |= vap->va_mode & ~S_IFMT;
3067         }
3068 
3069         if ((mask & AT_UID) || (mask & AT_GID)) {
3070                 if (mask & AT_UID)
3071                         get->va_uid = vap->va_uid;
3072                 if (mask & AT_GID)
3073                         get->va_gid = vap->va_gid;
3074                 /*
3075                  * a callback must be provided if the protocol is set
3076                  */
3077                 if ((protocol & AT_UID) || (protocol & AT_GID)) {
3078                         ASSERT(callback);
3079                         error = callback(dv, get, protocol);
3080                         if (error) {
3081                                 rw_exit(&dv->sdev_contents);
3082                                 rw_exit(&parent->sdev_contents);
3083                                 return (error);
3084                         }
3085                 }
3086         }
3087 
3088         if (mask & AT_ATIME)
3089                 get->va_atime = vap->va_atime;
3090         if (mask & AT_MTIME)
3091                 get->va_mtime = vap->va_mtime;
3092         if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) {
3093                 gethrestime(&get->va_ctime);
3094         }
3095 
3096         sdev_vattr_merge(dv, get);
3097         rw_exit(&dv->sdev_contents);
3098         rw_exit(&parent->sdev_contents);
3099         return (0);
3100 }
3101 
3102 /*
3103  * a generic inactive() function
3104  */
3105 /*ARGSUSED*/
3106 void
3107 devname_inactive_func(struct vnode *vp, struct cred *cred,
3108     void (*callback)(struct vnode *))
3109 {
3110         int clean;
3111         struct sdev_node *dv = VTOSDEV(vp);
3112         int state;
3113 
3114         mutex_enter(&vp->v_lock);
3115         ASSERT(vp->v_count >= 1);
3116 
3117 
3118         if (vp->v_count == 1 && callback != NULL)
3119                 callback(vp);
3120 
3121         rw_enter(&dv->sdev_contents, RW_WRITER);
3122         state = dv->sdev_state;
3123 
3124         clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE);
3125 
3126         /*
3127          * sdev is a rather bad public citizen. It violates the general
3128          * agreement that in memory nodes should always have a valid reference
3129          * count on their vnode. But that's not the case here. This means that
3130          * we do actually have to distinguish between getting inactive callbacks
3131          * for zombies and otherwise. This should probably be fixed.
3132          */
3133         if (clean) {
3134                 /* Remove the . entry to ourselves */
3135                 if (vp->v_type == VDIR) {
3136                         decr_link(dv);
3137                 }
3138                 VERIFY(dv->sdev_nlink == 1);
3139                 decr_link(dv);
3140                 VN_RELE_LOCKED(vp);
3141                 rw_exit(&dv->sdev_contents);
3142                 mutex_exit(&vp->v_lock);
3143                 sdev_nodedestroy(dv, 0);
3144         } else {
3145                 VN_RELE_LOCKED(vp);
3146                 rw_exit(&dv->sdev_contents);
3147                 mutex_exit(&vp->v_lock);
3148         }
3149 }