1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2013, 2016 Joyent, Inc. All rights reserved.
  24  */
  25 
  26 /*
  27  * utility routines for the /dev fs
  28  */
  29 
  30 #include <sys/types.h>
  31 #include <sys/param.h>
  32 #include <sys/t_lock.h>
  33 #include <sys/systm.h>
  34 #include <sys/sysmacros.h>
  35 #include <sys/user.h>
  36 #include <sys/time.h>
  37 #include <sys/vfs.h>
  38 #include <sys/vnode.h>
  39 #include <sys/file.h>
  40 #include <sys/fcntl.h>
  41 #include <sys/flock.h>
  42 #include <sys/kmem.h>
  43 #include <sys/uio.h>
  44 #include <sys/errno.h>
  45 #include <sys/stat.h>
  46 #include <sys/cred.h>
  47 #include <sys/dirent.h>
  48 #include <sys/pathname.h>
  49 #include <sys/cmn_err.h>
  50 #include <sys/debug.h>
  51 #include <sys/mode.h>
  52 #include <sys/policy.h>
  53 #include <fs/fs_subr.h>
  54 #include <sys/mount.h>
  55 #include <sys/fs/snode.h>
  56 #include <sys/fs/dv_node.h>
  57 #include <sys/fs/sdev_impl.h>
  58 #include <sys/sunndi.h>
  59 #include <sys/sunmdi.h>
  60 #include <sys/conf.h>
  61 #include <sys/proc.h>
  62 #include <sys/user.h>
  63 #include <sys/modctl.h>
  64 
  65 #ifdef DEBUG
  66 int sdev_debug = 0x00000001;
  67 int sdev_debug_cache_flags = 0;
  68 #endif
  69 
  70 /*
  71  * globals
  72  */
  73 /* prototype memory vattrs */
  74 vattr_t sdev_vattr_dir = {
  75         AT_TYPE|AT_MODE|AT_UID|AT_GID,          /* va_mask */
  76         VDIR,                                   /* va_type */
  77         SDEV_DIRMODE_DEFAULT,                   /* va_mode */
  78         SDEV_UID_DEFAULT,                       /* va_uid */
  79         SDEV_GID_DEFAULT,                       /* va_gid */
  80         0,                                      /* va_fsid */
  81         0,                                      /* va_nodeid */
  82         0,                                      /* va_nlink */
  83         0,                                      /* va_size */
  84         0,                                      /* va_atime */
  85         0,                                      /* va_mtime */
  86         0,                                      /* va_ctime */
  87         0,                                      /* va_rdev */
  88         0,                                      /* va_blksize */
  89         0,                                      /* va_nblocks */
  90         0                                       /* va_vcode */
  91 };
  92 
  93 vattr_t sdev_vattr_lnk = {
  94         AT_TYPE|AT_MODE,                        /* va_mask */
  95         VLNK,                                   /* va_type */
  96         SDEV_LNKMODE_DEFAULT,                   /* va_mode */
  97         SDEV_UID_DEFAULT,                       /* va_uid */
  98         SDEV_GID_DEFAULT,                       /* va_gid */
  99         0,                                      /* va_fsid */
 100         0,                                      /* va_nodeid */
 101         0,                                      /* va_nlink */
 102         0,                                      /* va_size */
 103         0,                                      /* va_atime */
 104         0,                                      /* va_mtime */
 105         0,                                      /* va_ctime */
 106         0,                                      /* va_rdev */
 107         0,                                      /* va_blksize */
 108         0,                                      /* va_nblocks */
 109         0                                       /* va_vcode */
 110 };
 111 
 112 vattr_t sdev_vattr_blk = {
 113         AT_TYPE|AT_MODE|AT_UID|AT_GID,          /* va_mask */
 114         VBLK,                                   /* va_type */
 115         S_IFBLK | SDEV_DEVMODE_DEFAULT,         /* va_mode */
 116         SDEV_UID_DEFAULT,                       /* va_uid */
 117         SDEV_GID_DEFAULT,                       /* va_gid */
 118         0,                                      /* va_fsid */
 119         0,                                      /* va_nodeid */
 120         0,                                      /* va_nlink */
 121         0,                                      /* va_size */
 122         0,                                      /* va_atime */
 123         0,                                      /* va_mtime */
 124         0,                                      /* va_ctime */
 125         0,                                      /* va_rdev */
 126         0,                                      /* va_blksize */
 127         0,                                      /* va_nblocks */
 128         0                                       /* va_vcode */
 129 };
 130 
 131 vattr_t sdev_vattr_chr = {
 132         AT_TYPE|AT_MODE|AT_UID|AT_GID,          /* va_mask */
 133         VCHR,                                   /* va_type */
 134         S_IFCHR | SDEV_DEVMODE_DEFAULT,         /* va_mode */
 135         SDEV_UID_DEFAULT,                       /* va_uid */
 136         SDEV_GID_DEFAULT,                       /* va_gid */
 137         0,                                      /* va_fsid */
 138         0,                                      /* va_nodeid */
 139         0,                                      /* va_nlink */
 140         0,                                      /* va_size */
 141         0,                                      /* va_atime */
 142         0,                                      /* va_mtime */
 143         0,                                      /* va_ctime */
 144         0,                                      /* va_rdev */
 145         0,                                      /* va_blksize */
 146         0,                                      /* va_nblocks */
 147         0                                       /* va_vcode */
 148 };
 149 
 150 kmem_cache_t    *sdev_node_cache;       /* sdev_node cache */
 151 int             devtype;                /* fstype */
 152 
 153 static void
 154 sdev_prof_free(struct sdev_node *dv)
 155 {
 156         ASSERT(!SDEV_IS_GLOBAL(dv));
 157         nvlist_free(dv->sdev_prof.dev_name);
 158         nvlist_free(dv->sdev_prof.dev_map);
 159         nvlist_free(dv->sdev_prof.dev_symlink);
 160         nvlist_free(dv->sdev_prof.dev_glob_incdir);
 161         nvlist_free(dv->sdev_prof.dev_glob_excdir);
 162         bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
 163 }
 164 
 165 /* sdev_node cache constructor */
 166 /*ARGSUSED1*/
 167 static int
 168 i_sdev_node_ctor(void *buf, void *cfarg, int flag)
 169 {
 170         struct sdev_node *dv = (struct sdev_node *)buf;
 171         struct vnode *vp;
 172 
 173         bzero(buf, sizeof (struct sdev_node));
 174         vp = dv->sdev_vnode = vn_alloc(flag);
 175         if (vp == NULL) {
 176                 return (-1);
 177         }
 178         vp->v_data = dv;
 179         rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL);
 180         return (0);
 181 }
 182 
 183 /* sdev_node cache destructor */
 184 /*ARGSUSED1*/
 185 static void
 186 i_sdev_node_dtor(void *buf, void *arg)
 187 {
 188         struct sdev_node *dv = (struct sdev_node *)buf;
 189         struct vnode *vp = SDEVTOV(dv);
 190 
 191         rw_destroy(&dv->sdev_contents);
 192         vn_free(vp);
 193 }
 194 
 195 /* initialize sdev_node cache */
 196 void
 197 sdev_node_cache_init()
 198 {
 199         int flags = 0;
 200 
 201 #ifdef  DEBUG
 202         flags = sdev_debug_cache_flags;
 203         if (flags)
 204                 sdcmn_err(("cache debug flags 0x%x\n", flags));
 205 #endif  /* DEBUG */
 206 
 207         ASSERT(sdev_node_cache == NULL);
 208         sdev_node_cache = kmem_cache_create("sdev_node_cache",
 209             sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor,
 210             NULL, NULL, NULL, flags);
 211 }
 212 
 213 /* destroy sdev_node cache */
 214 void
 215 sdev_node_cache_fini()
 216 {
 217         ASSERT(sdev_node_cache != NULL);
 218         kmem_cache_destroy(sdev_node_cache);
 219         sdev_node_cache = NULL;
 220 }
 221 
 222 /*
 223  * Compare two nodes lexographically to balance avl tree
 224  */
 225 static int
 226 sdev_compare_nodes(const struct sdev_node *dv1, const struct sdev_node *dv2)
 227 {
 228         int rv;
 229         if ((rv = strcmp(dv1->sdev_name, dv2->sdev_name)) == 0)
 230                 return (0);
 231         return ((rv < 0) ? -1 : 1);
 232 }
 233 
 234 void
 235 sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state)
 236 {
 237         ASSERT(dv);
 238         ASSERT(RW_WRITE_HELD(&dv->sdev_contents));
 239         dv->sdev_state = state;
 240 }
 241 
 242 static void
 243 sdev_attr_update(struct sdev_node *dv, vattr_t *vap)
 244 {
 245         timestruc_t     now;
 246         struct vattr    *attrp;
 247         uint_t          mask;
 248 
 249         ASSERT(dv->sdev_attr);
 250         ASSERT(vap);
 251 
 252         attrp = dv->sdev_attr;
 253         mask = vap->va_mask;
 254         if (mask & AT_TYPE)
 255                 attrp->va_type = vap->va_type;
 256         if (mask & AT_MODE)
 257                 attrp->va_mode = vap->va_mode;
 258         if (mask & AT_UID)
 259                 attrp->va_uid = vap->va_uid;
 260         if (mask & AT_GID)
 261                 attrp->va_gid = vap->va_gid;
 262         if (mask & AT_RDEV)
 263                 attrp->va_rdev = vap->va_rdev;
 264 
 265         gethrestime(&now);
 266         attrp->va_atime = (mask & AT_ATIME) ? vap->va_atime : now;
 267         attrp->va_mtime = (mask & AT_MTIME) ? vap->va_mtime : now;
 268         attrp->va_ctime = (mask & AT_CTIME) ? vap->va_ctime : now;
 269 }
 270 
 271 static void
 272 sdev_attr_alloc(struct sdev_node *dv, vattr_t *vap)
 273 {
 274         ASSERT(dv->sdev_attr == NULL);
 275         ASSERT(vap->va_mask & AT_TYPE);
 276         ASSERT(vap->va_mask & AT_MODE);
 277 
 278         dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP);
 279         sdev_attr_update(dv, vap);
 280 }
 281 
 282 /* alloc and initialize a sdev_node */
 283 int
 284 sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
 285     vattr_t *vap)
 286 {
 287         struct sdev_node *dv = NULL;
 288         struct vnode *vp;
 289         size_t nmlen, len;
 290         devname_handle_t  *dhl;
 291 
 292         nmlen = strlen(nm) + 1;
 293         if (nmlen > MAXNAMELEN) {
 294                 sdcmn_err9(("sdev_nodeinit: node name %s"
 295                     " too long\n", nm));
 296                 *newdv = NULL;
 297                 return (ENAMETOOLONG);
 298         }
 299 
 300         dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
 301 
 302         dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP);
 303         bcopy(nm, dv->sdev_name, nmlen);
 304         dv->sdev_namelen = nmlen - 1;        /* '\0' not included */
 305         len = strlen(ddv->sdev_path) + strlen(nm) + 2;
 306         dv->sdev_path = kmem_alloc(len, KM_SLEEP);
 307         (void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm);
 308         /* overwritten for VLNK nodes */
 309         dv->sdev_symlink = NULL;
 310         list_link_init(&dv->sdev_plist);
 311 
 312         vp = SDEVTOV(dv);
 313         vn_reinit(vp);
 314         vp->v_vfsp = SDEVTOV(ddv)->v_vfsp;
 315         if (vap)
 316                 vp->v_type = vap->va_type;
 317 
 318         /*
 319          * initialized to the parent's vnodeops.
 320          * maybe overwriten for a VDIR
 321          */
 322         vn_setops(vp, vn_getops(SDEVTOV(ddv)));
 323         vn_exists(vp);
 324 
 325         dv->sdev_dotdot = NULL;
 326         dv->sdev_attrvp = NULL;
 327         if (vap) {
 328                 sdev_attr_alloc(dv, vap);
 329         } else {
 330                 dv->sdev_attr = NULL;
 331         }
 332 
 333         dv->sdev_ino = sdev_mkino(dv);
 334         dv->sdev_nlink = 0;          /* updated on insert */
 335         dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */
 336         dv->sdev_flags |= SDEV_BUILD;
 337         mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
 338         cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
 339         if (SDEV_IS_GLOBAL(ddv)) {
 340                 dv->sdev_flags |= SDEV_GLOBAL;
 341                 dhl = &(dv->sdev_handle);
 342                 dhl->dh_data = dv;
 343                 dhl->dh_args = NULL;
 344                 sdev_set_no_negcache(dv);
 345                 dv->sdev_gdir_gen = 0;
 346         } else {
 347                 dv->sdev_flags &= ~SDEV_GLOBAL;
 348                 dv->sdev_origin = NULL; /* set later */
 349                 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
 350                 dv->sdev_ldir_gen = 0;
 351                 dv->sdev_devtree_gen = 0;
 352         }
 353 
 354         rw_enter(&dv->sdev_contents, RW_WRITER);
 355         sdev_set_nodestate(dv, SDEV_INIT);
 356         rw_exit(&dv->sdev_contents);
 357         *newdv = dv;
 358 
 359         return (0);
 360 }
 361 
 362 /*
 363  * Transition a sdev_node into SDEV_READY state. If this fails, it is up to the
 364  * caller to transition the node to the SDEV_ZOMBIE state.
 365  */
 366 int
 367 sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp,
 368     void *args, struct cred *cred)
 369 {
 370         int error = 0;
 371         struct vnode *vp = SDEVTOV(dv);
 372         vtype_t type;
 373 
 374         ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap);
 375 
 376         type = vap->va_type;
 377         vp->v_type = type;
 378         vp->v_rdev = vap->va_rdev;
 379         rw_enter(&dv->sdev_contents, RW_WRITER);
 380         if (type == VDIR) {
 381                 dv->sdev_nlink = 2;
 382                 dv->sdev_flags &= ~SDEV_PERSIST;
 383                 dv->sdev_flags &= ~SDEV_DYNAMIC;
 384                 vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */
 385                 ASSERT(dv->sdev_dotdot);
 386                 ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR);
 387                 vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev;
 388                 avl_create(&dv->sdev_entries,
 389                     (int (*)(const void *, const void *))sdev_compare_nodes,
 390                     sizeof (struct sdev_node),
 391                     offsetof(struct sdev_node, sdev_avllink));
 392         } else if (type == VLNK) {
 393                 ASSERT(args);
 394                 dv->sdev_nlink = 1;
 395                 dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP);
 396         } else {
 397                 dv->sdev_nlink = 1;
 398         }
 399         sdev_plugin_nodeready(dv);
 400 
 401         if (!(SDEV_IS_GLOBAL(dv))) {
 402                 dv->sdev_origin = (struct sdev_node *)args;
 403                 dv->sdev_flags &= ~SDEV_PERSIST;
 404         }
 405 
 406         /*
 407          * shadow node is created here OR
 408          * if failed (indicated by dv->sdev_attrvp == NULL),
 409          * created later in sdev_setattr
 410          */
 411         if (avp) {
 412                 dv->sdev_attrvp = avp;
 413         } else {
 414                 if (dv->sdev_attr == NULL) {
 415                         sdev_attr_alloc(dv, vap);
 416                 } else {
 417                         sdev_attr_update(dv, vap);
 418                 }
 419 
 420                 if ((dv->sdev_attrvp == NULL) && SDEV_IS_PERSIST(dv))
 421                         error = sdev_shadow_node(dv, cred);
 422         }
 423 
 424         if (error == 0) {
 425                 /* transition to READY state */
 426                 sdev_set_nodestate(dv, SDEV_READY);
 427                 sdev_nc_node_exists(dv);
 428         }
 429         rw_exit(&dv->sdev_contents);
 430         return (error);
 431 }
 432 
 433 /*
 434  * Build the VROOT sdev_node.
 435  */
 436 /*ARGSUSED*/
 437 struct sdev_node *
 438 sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp,
 439     struct vnode *avp, struct cred *cred)
 440 {
 441         struct sdev_node *dv;
 442         struct vnode *vp;
 443         char devdir[] = "/dev";
 444 
 445         ASSERT(sdev_node_cache != NULL);
 446         ASSERT(avp);
 447         dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
 448         vp = SDEVTOV(dv);
 449         vn_reinit(vp);
 450         vp->v_flag |= VROOT;
 451         vp->v_vfsp = vfsp;
 452         vp->v_type = VDIR;
 453         vp->v_rdev = devdev;
 454         vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */
 455         vn_exists(vp);
 456 
 457         if (vfsp->vfs_mntpt)
 458                 dv->sdev_name = i_ddi_strdup(
 459                     (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP);
 460         else
 461                 /* vfs_mountdev1 set mount point later */
 462                 dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP);
 463         dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */
 464         dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP);
 465         dv->sdev_ino = SDEV_ROOTINO;
 466         dv->sdev_nlink = 2;          /* name + . (no sdev_insert) */
 467         dv->sdev_dotdot = dv;                /* .. == self */
 468         dv->sdev_attrvp = avp;
 469         dv->sdev_attr = NULL;
 470         mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
 471         cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
 472         if (strcmp(dv->sdev_name, "/dev") == 0) {
 473                 dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST;
 474                 bzero(&dv->sdev_handle, sizeof (dv->sdev_handle));
 475                 dv->sdev_gdir_gen = 0;
 476         } else {
 477                 dv->sdev_flags = SDEV_BUILD;
 478                 dv->sdev_flags &= ~SDEV_PERSIST;
 479                 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
 480                 dv->sdev_ldir_gen = 0;
 481                 dv->sdev_devtree_gen = 0;
 482         }
 483 
 484         avl_create(&dv->sdev_entries,
 485             (int (*)(const void *, const void *))sdev_compare_nodes,
 486             sizeof (struct sdev_node),
 487             offsetof(struct sdev_node, sdev_avllink));
 488 
 489         rw_enter(&dv->sdev_contents, RW_WRITER);
 490         sdev_set_nodestate(dv, SDEV_READY);
 491         rw_exit(&dv->sdev_contents);
 492         sdev_nc_node_exists(dv);
 493         return (dv);
 494 }
 495 
 496 struct sdev_vop_table vtab[] = {
 497         { "pts", devpts_vnodeops_tbl, &devpts_vnodeops, devpts_validate,
 498         SDEV_DYNAMIC | SDEV_VTOR },
 499 
 500         { "vt", devvt_vnodeops_tbl, &devvt_vnodeops, devvt_validate,
 501         SDEV_DYNAMIC | SDEV_VTOR },
 502 
 503         { "zvol", devzvol_vnodeops_tbl, &devzvol_vnodeops,
 504         devzvol_validate, SDEV_ZONED | SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR },
 505 
 506         { "zcons", NULL, NULL, NULL, SDEV_NO_NCACHE },
 507 
 508         { "net", devnet_vnodeops_tbl, &devnet_vnodeops, devnet_validate,
 509         SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR },
 510 
 511         { "ipnet", devipnet_vnodeops_tbl, &devipnet_vnodeops,
 512         devipnet_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_NO_NCACHE },
 513 
 514         /*
 515          * SDEV_DYNAMIC: prevent calling out to devfsadm, since only the
 516          * lofi driver controls child nodes.
 517          *
 518          * SDEV_PERSIST: ensure devfsadm knows to clean up any persisted
 519          * stale nodes (e.g. from devfsadm -R).
 520          *
 521          * In addition, devfsadm knows not to attempt a rmdir: a zone
 522          * may hold a reference, which would zombify the node,
 523          * preventing a mkdir.
 524          */
 525 
 526         { "lofi", NULL, NULL, NULL,
 527             SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST },
 528         { "rlofi", NULL, NULL, NULL,
 529             SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST },
 530 
 531         { NULL, NULL, NULL, NULL, 0}
 532 };
 533 
 534 
 535 /*
 536  * Build the base root inode
 537  */
 538 ino_t
 539 sdev_mkino(struct sdev_node *dv)
 540 {
 541         ino_t   ino;
 542 
 543         /*
 544          * for now, follow the lead of tmpfs here
 545          * need to someday understand the requirements here
 546          */
 547         ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3);
 548         ino += SDEV_ROOTINO + 1;
 549 
 550         return (ino);
 551 }
 552 
 553 int
 554 sdev_getlink(struct vnode *linkvp, char **link)
 555 {
 556         int err;
 557         char *buf;
 558         struct uio uio = {0};
 559         struct iovec iov = {0};
 560 
 561         if (linkvp == NULL)
 562                 return (ENOENT);
 563         ASSERT(linkvp->v_type == VLNK);
 564 
 565         buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
 566         iov.iov_base = buf;
 567         iov.iov_len = MAXPATHLEN;
 568         uio.uio_iov = &iov;
 569         uio.uio_iovcnt = 1;
 570         uio.uio_resid = MAXPATHLEN;
 571         uio.uio_segflg = UIO_SYSSPACE;
 572         uio.uio_llimit = MAXOFFSET_T;
 573 
 574         err = VOP_READLINK(linkvp, &uio, kcred, NULL);
 575         if (err) {
 576                 cmn_err(CE_WARN, "readlink %s failed in dev\n", buf);
 577                 kmem_free(buf, MAXPATHLEN);
 578                 return (ENOENT);
 579         }
 580 
 581         /* mission complete */
 582         *link = i_ddi_strdup(buf, KM_SLEEP);
 583         kmem_free(buf, MAXPATHLEN);
 584         return (0);
 585 }
 586 
 587 /*
 588  * A convenient wrapper to get the devfs node vnode for a device
 589  * minor functionality: readlink() of a /dev symlink
 590  * Place the link into dv->sdev_symlink
 591  */
 592 static int
 593 sdev_follow_link(struct sdev_node *dv)
 594 {
 595         int err;
 596         struct vnode *linkvp;
 597         char *link = NULL;
 598 
 599         linkvp = SDEVTOV(dv);
 600         if (linkvp == NULL)
 601                 return (ENOENT);
 602         ASSERT(linkvp->v_type == VLNK);
 603         err = sdev_getlink(linkvp, &link);
 604         if (err) {
 605                 dv->sdev_symlink = NULL;
 606                 return (ENOENT);
 607         }
 608 
 609         ASSERT(link != NULL);
 610         dv->sdev_symlink = link;
 611         return (0);
 612 }
 613 
 614 static int
 615 sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs)
 616 {
 617         vtype_t otype = SDEVTOV(dv)->v_type;
 618 
 619         /*
 620          * existing sdev_node has a different type.
 621          */
 622         if (otype != nvap->va_type) {
 623                 sdcmn_err9(("sdev_node_check: existing node "
 624                     "  %s type %d does not match new node type %d\n",
 625                     dv->sdev_name, otype, nvap->va_type));
 626                 return (EEXIST);
 627         }
 628 
 629         /*
 630          * For a symlink, the target should be the same.
 631          */
 632         if (otype == VLNK) {
 633                 ASSERT(nargs != NULL);
 634                 ASSERT(dv->sdev_symlink != NULL);
 635                 if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) {
 636                         sdcmn_err9(("sdev_node_check: existing node "
 637                             " %s has different symlink %s as new node "
 638                             " %s\n", dv->sdev_name, dv->sdev_symlink,
 639                             (char *)nargs));
 640                         return (EEXIST);
 641                 }
 642         }
 643 
 644         return (0);
 645 }
 646 
 647 /*
 648  * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready()
 649  *
 650  * arguments:
 651  *      - ddv (parent)
 652  *      - nm (child name)
 653  *      - newdv (sdev_node for nm is returned here)
 654  *      - vap (vattr for the node to be created, va_type should be set.
 655  *      - avp (attribute vnode)
 656  *        the defaults should be used if unknown)
 657  *      - cred
 658  *      - args
 659  *          . tnm (for VLNK)
 660  *          . global sdev_node (for !SDEV_GLOBAL)
 661  *      - state: SDEV_INIT, SDEV_READY
 662  *
 663  * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT)
 664  *
 665  * NOTE:  directory contents writers lock needs to be held before
 666  *        calling this routine.
 667  */
 668 int
 669 sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
 670     struct vattr *vap, struct vnode *avp, void *args, struct cred *cred,
 671     sdev_node_state_t state)
 672 {
 673         int error = 0;
 674         sdev_node_state_t node_state;
 675         struct sdev_node *dv = NULL;
 676 
 677         ASSERT(state != SDEV_ZOMBIE);
 678         ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
 679 
 680         if (*newdv) {
 681                 dv = *newdv;
 682         } else {
 683                 /* allocate and initialize a sdev_node */
 684                 if (ddv->sdev_state == SDEV_ZOMBIE) {
 685                         sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n",
 686                             ddv->sdev_path));
 687                         return (ENOENT);
 688                 }
 689 
 690                 error = sdev_nodeinit(ddv, nm, &dv, vap);
 691                 if (error != 0) {
 692                         sdcmn_err9(("sdev_mknode: error %d,"
 693                             " name %s can not be initialized\n",
 694                             error, nm));
 695                         return (error);
 696                 }
 697                 ASSERT(dv);
 698 
 699                 /* insert into the directory cache */
 700                 sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD);
 701         }
 702 
 703         ASSERT(dv);
 704         node_state = dv->sdev_state;
 705         ASSERT(node_state != SDEV_ZOMBIE);
 706 
 707         if (state == SDEV_READY) {
 708                 switch (node_state) {
 709                 case SDEV_INIT:
 710                         error = sdev_nodeready(dv, vap, avp, args, cred);
 711                         if (error) {
 712                                 sdcmn_err9(("sdev_mknode: node %s can NOT"
 713                                     " be transitioned into READY state, "
 714                                     "error %d\n", nm, error));
 715                         }
 716                         break;
 717                 case SDEV_READY:
 718                         /*
 719                          * Do some sanity checking to make sure
 720                          * the existing sdev_node is what has been
 721                          * asked for.
 722                          */
 723                         error = sdev_node_check(dv, vap, args);
 724                         break;
 725                 default:
 726                         break;
 727                 }
 728         }
 729 
 730         if (!error) {
 731                 *newdv = dv;
 732                 ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE);
 733         } else {
 734                 sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE);
 735                 /*
 736                  * We created this node, it wasn't passed into us. Therefore it
 737                  * is up to us to delete it.
 738                  */
 739                 if (*newdv == NULL)
 740                         SDEV_SIMPLE_RELE(dv);
 741                 *newdv = NULL;
 742         }
 743 
 744         return (error);
 745 }
 746 
 747 /*
 748  * convenient wrapper to change vp's ATIME, CTIME and MTIME
 749  */
 750 void
 751 sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask)
 752 {
 753         struct vattr attr;
 754         timestruc_t now;
 755         int err;
 756 
 757         ASSERT(vp);
 758         gethrestime(&now);
 759         if (mask & AT_CTIME)
 760                 attr.va_ctime = now;
 761         if (mask & AT_MTIME)
 762                 attr.va_mtime = now;
 763         if (mask & AT_ATIME)
 764                 attr.va_atime = now;
 765 
 766         attr.va_mask = (mask & AT_TIMES);
 767         err = VOP_SETATTR(vp, &attr, 0, cred, NULL);
 768         if (err && (err != EROFS)) {
 769                 sdcmn_err(("update timestamps error %d\n", err));
 770         }
 771 }
 772 
 773 /*
 774  * the backing store vnode is released here
 775  */
 776 /*ARGSUSED1*/
 777 void
 778 sdev_nodedestroy(struct sdev_node *dv, uint_t flags)
 779 {
 780         /* no references */
 781         ASSERT(dv->sdev_nlink == 0);
 782 
 783         if (dv->sdev_attrvp != NULLVP) {
 784                 VN_RELE(dv->sdev_attrvp);
 785                 /*
 786                  * reset the attrvp so that no more
 787                  * references can be made on this already
 788                  * vn_rele() vnode
 789                  */
 790                 dv->sdev_attrvp = NULLVP;
 791         }
 792 
 793         if (dv->sdev_attr != NULL) {
 794                 kmem_free(dv->sdev_attr, sizeof (struct vattr));
 795                 dv->sdev_attr = NULL;
 796         }
 797 
 798         if (dv->sdev_name != NULL) {
 799                 kmem_free(dv->sdev_name, dv->sdev_namelen + 1);
 800                 dv->sdev_name = NULL;
 801         }
 802 
 803         if (dv->sdev_symlink != NULL) {
 804                 kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1);
 805                 dv->sdev_symlink = NULL;
 806         }
 807 
 808         if (dv->sdev_path) {
 809                 kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1);
 810                 dv->sdev_path = NULL;
 811         }
 812 
 813         if (!SDEV_IS_GLOBAL(dv)) {
 814                 sdev_prof_free(dv);
 815                 if (dv->sdev_vnode->v_type != VLNK && dv->sdev_origin != NULL)
 816                         SDEV_RELE(dv->sdev_origin);
 817         }
 818 
 819         if (SDEVTOV(dv)->v_type == VDIR) {
 820                 ASSERT(SDEV_FIRST_ENTRY(dv) == NULL);
 821                 avl_destroy(&dv->sdev_entries);
 822         }
 823 
 824         mutex_destroy(&dv->sdev_lookup_lock);
 825         cv_destroy(&dv->sdev_lookup_cv);
 826 
 827         /* return node to initial state as per constructor */
 828         (void) memset((void *)&dv->sdev_instance_data, 0,
 829             sizeof (dv->sdev_instance_data));
 830         vn_invalid(SDEVTOV(dv));
 831         dv->sdev_private = NULL;
 832         kmem_cache_free(sdev_node_cache, dv);
 833 }
 834 
 835 /*
 836  * DIRECTORY CACHE lookup
 837  */
 838 struct sdev_node *
 839 sdev_findbyname(struct sdev_node *ddv, char *nm)
 840 {
 841         struct sdev_node *dv;
 842         struct sdev_node dvtmp;
 843         avl_index_t     where;
 844 
 845         ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
 846 
 847         dvtmp.sdev_name = nm;
 848         dv = avl_find(&ddv->sdev_entries, &dvtmp, &where);
 849         if (dv) {
 850                 ASSERT(dv->sdev_dotdot == ddv);
 851                 ASSERT(strcmp(dv->sdev_name, nm) == 0);
 852                 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
 853                 SDEV_HOLD(dv);
 854                 return (dv);
 855         }
 856         return (NULL);
 857 }
 858 
 859 /*
 860  * Inserts a new sdev_node in a parent directory
 861  */
 862 void
 863 sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv)
 864 {
 865         avl_index_t where;
 866 
 867         ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
 868         ASSERT(SDEVTOV(ddv)->v_type == VDIR);
 869         ASSERT(ddv->sdev_nlink >= 2);
 870         ASSERT(dv->sdev_nlink == 0);
 871         ASSERT(dv->sdev_state != SDEV_ZOMBIE);
 872 
 873         dv->sdev_dotdot = ddv;
 874         VERIFY(avl_find(&ddv->sdev_entries, dv, &where) == NULL);
 875         avl_insert(&ddv->sdev_entries, dv, where);
 876         ddv->sdev_nlink++;
 877 }
 878 
 879 /*
 880  * The following check is needed because while sdev_nodes are linked
 881  * in SDEV_INIT state, they have their link counts incremented only
 882  * in SDEV_READY state.
 883  */
 884 static void
 885 decr_link(struct sdev_node *dv)
 886 {
 887         VERIFY(RW_WRITE_HELD(&dv->sdev_contents));
 888         if (dv->sdev_state != SDEV_INIT) {
 889                 VERIFY(dv->sdev_nlink >= 1);
 890                 dv->sdev_nlink--;
 891         } else {
 892                 VERIFY(dv->sdev_nlink == 0);
 893         }
 894 }
 895 
 896 /*
 897  * Delete an existing dv from directory cache
 898  *
 899  * In the case of a node is still held by non-zero reference count, the node is
 900  * put into ZOMBIE state. The node is always unlinked from its parent, but it is
 901  * not destroyed via sdev_inactive until its reference count reaches "0".
 902  */
 903 static void
 904 sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv)
 905 {
 906         struct vnode *vp;
 907         sdev_node_state_t os;
 908 
 909         ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
 910 
 911         vp = SDEVTOV(dv);
 912         mutex_enter(&vp->v_lock);
 913         rw_enter(&dv->sdev_contents, RW_WRITER);
 914         os = dv->sdev_state;
 915         ASSERT(os != SDEV_ZOMBIE);
 916         dv->sdev_state = SDEV_ZOMBIE;
 917 
 918         /*
 919          * unlink ourselves from the parent directory now to take care of the ..
 920          * link. However, if we're a directory, we don't remove our reference to
 921          * ourself eg. '.' until we are torn down in the inactive callback.
 922          */
 923         decr_link(ddv);
 924         avl_remove(&ddv->sdev_entries, dv);
 925         /*
 926          * sdev_inactive expects nodes to have a link to themselves when we're
 927          * tearing them down. If we're transitioning from the initial state to
 928          * zombie and not via ready, then we're not going to have this link that
 929          * comes from the node being ready. As a result, we need to increment
 930          * our link count by one to account for this.
 931          */
 932         if (os == SDEV_INIT && dv->sdev_nlink == 0)
 933                 dv->sdev_nlink++;
 934         rw_exit(&dv->sdev_contents);
 935         mutex_exit(&vp->v_lock);
 936 }
 937 
 938 /*
 939  * check if the source is in the path of the target
 940  *
 941  * source and target are different
 942  */
 943 /*ARGSUSED2*/
 944 static int
 945 sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred)
 946 {
 947         int error = 0;
 948         struct sdev_node *dotdot, *dir;
 949 
 950         dotdot = tdv->sdev_dotdot;
 951         ASSERT(dotdot);
 952 
 953         /* fs root */
 954         if (dotdot == tdv) {
 955                 return (0);
 956         }
 957 
 958         for (;;) {
 959                 /*
 960                  * avoid error cases like
 961                  *      mv a a/b
 962                  *      mv a a/b/c
 963                  *      etc.
 964                  */
 965                 if (dotdot == sdv) {
 966                         error = EINVAL;
 967                         break;
 968                 }
 969 
 970                 dir = dotdot;
 971                 dotdot = dir->sdev_dotdot;
 972 
 973                 /* done checking because root is reached */
 974                 if (dir == dotdot) {
 975                         break;
 976                 }
 977         }
 978         return (error);
 979 }
 980 
 981 int
 982 sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv,
 983     struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm,
 984     struct cred *cred)
 985 {
 986         int error = 0;
 987         struct vnode *ovp = SDEVTOV(odv);
 988         struct vnode *nvp;
 989         struct vattr vattr;
 990         int doingdir = (ovp->v_type == VDIR);
 991         char *link = NULL;
 992         int samedir = (oddv == nddv) ? 1 : 0;
 993         int bkstore = 0;
 994         struct sdev_node *idv = NULL;
 995         struct sdev_node *ndv = NULL;
 996         timestruc_t now;
 997 
 998         vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
 999         error = VOP_GETATTR(ovp, &vattr, 0, cred, NULL);
1000         if (error)
1001                 return (error);
1002 
1003         if (!samedir)
1004                 rw_enter(&oddv->sdev_contents, RW_WRITER);
1005         rw_enter(&nddv->sdev_contents, RW_WRITER);
1006 
1007         /*
1008          * the source may have been deleted by another thread before
1009          * we gets here.
1010          */
1011         if (odv->sdev_state != SDEV_READY) {
1012                 error = ENOENT;
1013                 goto err_out;
1014         }
1015 
1016         if (doingdir && (odv == nddv)) {
1017                 error = EINVAL;
1018                 goto err_out;
1019         }
1020 
1021         /*
1022          * If renaming a directory, and the parents are different (".." must be
1023          * changed) then the source dir must not be in the dir hierarchy above
1024          * the target since it would orphan everything below the source dir.
1025          */
1026         if (doingdir && (oddv != nddv)) {
1027                 error = sdev_checkpath(odv, nddv, cred);
1028                 if (error)
1029                         goto err_out;
1030         }
1031 
1032         /* fix the source for a symlink */
1033         if (vattr.va_type == VLNK) {
1034                 if (odv->sdev_symlink == NULL) {
1035                         error = sdev_follow_link(odv);
1036                         if (error) {
1037                                 /*
1038                                  * The underlying symlink doesn't exist. This
1039                                  * node probably shouldn't even exist. While
1040                                  * it's a bit jarring to consumers, we're going
1041                                  * to remove the node from /dev.
1042                                  */
1043                                 if (SDEV_IS_PERSIST((*ndvp)))
1044                                         bkstore = 1;
1045                                 sdev_dirdelete(oddv, odv);
1046                                 if (bkstore) {
1047                                         ASSERT(nddv->sdev_attrvp);
1048                                         error = VOP_REMOVE(nddv->sdev_attrvp,
1049                                             nnm, cred, NULL, 0);
1050                                         if (error)
1051                                                 goto err_out;
1052                                 }
1053                                 error = ENOENT;
1054                                 goto err_out;
1055                         }
1056                 }
1057                 ASSERT(odv->sdev_symlink);
1058                 link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP);
1059         }
1060 
1061         /* destination existing */
1062         if (*ndvp) {
1063                 nvp = SDEVTOV(*ndvp);
1064                 ASSERT(nvp);
1065 
1066                 /* handling renaming to itself */
1067                 if (odv == *ndvp) {
1068                         error = 0;
1069                         goto err_out;
1070                 }
1071 
1072                 if (nvp->v_type == VDIR) {
1073                         if (!doingdir) {
1074                                 error = EISDIR;
1075                                 goto err_out;
1076                         }
1077 
1078                         if (vn_vfswlock(nvp)) {
1079                                 error = EBUSY;
1080                                 goto err_out;
1081                         }
1082 
1083                         if (vn_mountedvfs(nvp) != NULL) {
1084                                 vn_vfsunlock(nvp);
1085                                 error = EBUSY;
1086                                 goto err_out;
1087                         }
1088 
1089                         /* in case dir1 exists in dir2 and "mv dir1 dir2" */
1090                         if ((*ndvp)->sdev_nlink > 2) {
1091                                 vn_vfsunlock(nvp);
1092                                 error = EEXIST;
1093                                 goto err_out;
1094                         }
1095                         vn_vfsunlock(nvp);
1096 
1097                         /*
1098                          * We did not place the hold on *ndvp, so even though
1099                          * we're deleting the node, we should not get rid of our
1100                          * reference.
1101                          */
1102                         sdev_dirdelete(nddv, *ndvp);
1103                         *ndvp = NULL;
1104                         ASSERT(nddv->sdev_attrvp);
1105                         error = VOP_RMDIR(nddv->sdev_attrvp, nnm,
1106                             nddv->sdev_attrvp, cred, NULL, 0);
1107                         if (error)
1108                                 goto err_out;
1109                 } else {
1110                         if (doingdir) {
1111                                 error = ENOTDIR;
1112                                 goto err_out;
1113                         }
1114 
1115                         if (SDEV_IS_PERSIST((*ndvp))) {
1116                                 bkstore = 1;
1117                         }
1118 
1119                         /*
1120                          * Get rid of the node from the directory cache note.
1121                          * Don't forget that it's not up to us to remove the vn
1122                          * ref on the sdev node, as we did not place it.
1123                          */
1124                         sdev_dirdelete(nddv, *ndvp);
1125                         *ndvp = NULL;
1126                         if (bkstore) {
1127                                 ASSERT(nddv->sdev_attrvp);
1128                                 error = VOP_REMOVE(nddv->sdev_attrvp,
1129                                     nnm, cred, NULL, 0);
1130                                 if (error)
1131                                         goto err_out;
1132                         }
1133                 }
1134         }
1135 
1136         /*
1137          * make a fresh node from the source attrs
1138          */
1139         ASSERT(RW_WRITE_HELD(&nddv->sdev_contents));
1140         error = sdev_mknode(nddv, nnm, ndvp, &vattr,
1141             NULL, (void *)link, cred, SDEV_READY);
1142 
1143         if (link != NULL) {
1144                 kmem_free(link, strlen(link) + 1);
1145                 link = NULL;
1146         }
1147 
1148         if (error)
1149                 goto err_out;
1150         ASSERT(*ndvp);
1151         ASSERT((*ndvp)->sdev_state == SDEV_READY);
1152 
1153         /* move dir contents */
1154         if (doingdir) {
1155                 for (idv = SDEV_FIRST_ENTRY(odv); idv;
1156                     idv = SDEV_NEXT_ENTRY(odv, idv)) {
1157                         SDEV_HOLD(idv);
1158                         error = sdev_rnmnode(odv, idv,
1159                             (struct sdev_node *)(*ndvp), &ndv,
1160                             idv->sdev_name, cred);
1161                         SDEV_RELE(idv);
1162                         if (error)
1163                                 goto err_out;
1164                         ndv = NULL;
1165                 }
1166         }
1167 
1168         if ((*ndvp)->sdev_attrvp) {
1169                 sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred,
1170                     AT_CTIME|AT_ATIME);
1171         } else {
1172                 ASSERT((*ndvp)->sdev_attr);
1173                 gethrestime(&now);
1174                 (*ndvp)->sdev_attr->va_ctime = now;
1175                 (*ndvp)->sdev_attr->va_atime = now;
1176         }
1177 
1178         if (nddv->sdev_attrvp) {
1179                 sdev_update_timestamps(nddv->sdev_attrvp, kcred,
1180                     AT_MTIME|AT_ATIME);
1181         } else {
1182                 ASSERT(nddv->sdev_attr);
1183                 gethrestime(&now);
1184                 nddv->sdev_attr->va_mtime = now;
1185                 nddv->sdev_attr->va_atime = now;
1186         }
1187         rw_exit(&nddv->sdev_contents);
1188         if (!samedir)
1189                 rw_exit(&oddv->sdev_contents);
1190 
1191         SDEV_RELE(*ndvp);
1192         return (error);
1193 
1194 err_out:
1195         if (link != NULL) {
1196                 kmem_free(link, strlen(link) + 1);
1197                 link = NULL;
1198         }
1199 
1200         rw_exit(&nddv->sdev_contents);
1201         if (!samedir)
1202                 rw_exit(&oddv->sdev_contents);
1203         return (error);
1204 }
1205 
1206 /*
1207  * Merge sdev_node specific information into an attribute structure.
1208  *
1209  * note: sdev_node is not locked here
1210  */
1211 void
1212 sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap)
1213 {
1214         struct vnode *vp = SDEVTOV(dv);
1215 
1216         vap->va_nlink = dv->sdev_nlink;
1217         vap->va_nodeid = dv->sdev_ino;
1218         vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev;
1219         vap->va_type = vp->v_type;
1220 
1221         if (vp->v_type == VDIR) {
1222                 vap->va_rdev = 0;
1223                 vap->va_fsid = vp->v_rdev;
1224         } else if (vp->v_type == VLNK) {
1225                 vap->va_rdev = 0;
1226                 vap->va_mode  &= ~S_IFMT;
1227                 vap->va_mode |= S_IFLNK;
1228         } else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) {
1229                 vap->va_rdev = vp->v_rdev;
1230                 vap->va_mode &= ~S_IFMT;
1231                 if (vap->va_type == VCHR)
1232                         vap->va_mode |= S_IFCHR;
1233                 else
1234                         vap->va_mode |= S_IFBLK;
1235         } else {
1236                 vap->va_rdev = 0;
1237         }
1238 }
1239 
1240 struct vattr *
1241 sdev_getdefault_attr(enum vtype type)
1242 {
1243         if (type == VDIR)
1244                 return (&sdev_vattr_dir);
1245         else if (type == VCHR)
1246                 return (&sdev_vattr_chr);
1247         else if (type == VBLK)
1248                 return (&sdev_vattr_blk);
1249         else if (type == VLNK)
1250                 return (&sdev_vattr_lnk);
1251         else
1252                 return (NULL);
1253 }
1254 int
1255 sdev_to_vp(struct sdev_node *dv, struct vnode **vpp)
1256 {
1257         int rv = 0;
1258         struct vnode *vp = SDEVTOV(dv);
1259 
1260         switch (vp->v_type) {
1261         case VCHR:
1262         case VBLK:
1263                 /*
1264                  * If vnode is a device, return special vnode instead
1265                  * (though it knows all about -us- via sp->s_realvp)
1266                  */
1267                 *vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred);
1268                 VN_RELE(vp);
1269                 if (*vpp == NULLVP)
1270                         rv = ENOSYS;
1271                 break;
1272         default:        /* most types are returned as is */
1273                 *vpp = vp;
1274                 break;
1275         }
1276         return (rv);
1277 }
1278 
1279 /*
1280  * junction between devname and root file system, e.g. ufs
1281  */
1282 int
1283 devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp)
1284 {
1285         struct vnode *rdvp = ddv->sdev_attrvp;
1286         int rval = 0;
1287 
1288         ASSERT(rdvp);
1289 
1290         rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred, NULL, NULL,
1291             NULL);
1292         return (rval);
1293 }
1294 
1295 static int
1296 sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred)
1297 {
1298         struct sdev_node *dv = NULL;
1299         char    *nm;
1300         struct vnode *dirvp;
1301         int     error;
1302         vnode_t *vp;
1303         int eof;
1304         struct iovec iov;
1305         struct uio uio;
1306         struct dirent64 *dp;
1307         dirent64_t *dbuf;
1308         size_t dbuflen;
1309         struct vattr vattr;
1310         char *link = NULL;
1311 
1312         if (ddv->sdev_attrvp == NULL)
1313                 return (0);
1314         if (!(ddv->sdev_flags & SDEV_BUILD))
1315                 return (0);
1316 
1317         dirvp = ddv->sdev_attrvp;
1318         VN_HOLD(dirvp);
1319         dbuf = kmem_zalloc(dlen, KM_SLEEP);
1320 
1321         uio.uio_iov = &iov;
1322         uio.uio_iovcnt = 1;
1323         uio.uio_segflg = UIO_SYSSPACE;
1324         uio.uio_fmode = 0;
1325         uio.uio_extflg = UIO_COPY_CACHED;
1326         uio.uio_loffset = 0;
1327         uio.uio_llimit = MAXOFFSET_T;
1328 
1329         eof = 0;
1330         error = 0;
1331         while (!error && !eof) {
1332                 uio.uio_resid = dlen;
1333                 iov.iov_base = (char *)dbuf;
1334                 iov.iov_len = dlen;
1335                 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1336                 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0);
1337                 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1338 
1339                 dbuflen = dlen - uio.uio_resid;
1340                 if (error || dbuflen == 0)
1341                         break;
1342 
1343                 if (!(ddv->sdev_flags & SDEV_BUILD))
1344                         break;
1345 
1346                 for (dp = dbuf; ((intptr_t)dp <
1347                     (intptr_t)dbuf + dbuflen);
1348                     dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
1349                         nm = dp->d_name;
1350 
1351                         if (strcmp(nm, ".") == 0 ||
1352                             strcmp(nm, "..") == 0)
1353                                 continue;
1354 
1355                         vp = NULLVP;
1356                         dv = sdev_cache_lookup(ddv, nm);
1357                         if (dv) {
1358                                 VERIFY(dv->sdev_state != SDEV_ZOMBIE);
1359                                 SDEV_SIMPLE_RELE(dv);
1360                                 continue;
1361                         }
1362 
1363                         /* refill the cache if not already */
1364                         error = devname_backstore_lookup(ddv, nm, &vp);
1365                         if (error)
1366                                 continue;
1367 
1368                         vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
1369                         error = VOP_GETATTR(vp, &vattr, 0, cred, NULL);
1370                         if (error)
1371                                 continue;
1372 
1373                         if (vattr.va_type == VLNK) {
1374                                 error = sdev_getlink(vp, &link);
1375                                 if (error) {
1376                                         continue;
1377                                 }
1378                                 ASSERT(link != NULL);
1379                         }
1380 
1381                         if (!rw_tryupgrade(&ddv->sdev_contents)) {
1382                                 rw_exit(&ddv->sdev_contents);
1383                                 rw_enter(&ddv->sdev_contents, RW_WRITER);
1384                         }
1385                         error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link,
1386                             cred, SDEV_READY);
1387                         rw_downgrade(&ddv->sdev_contents);
1388 
1389                         if (link != NULL) {
1390                                 kmem_free(link, strlen(link) + 1);
1391                                 link = NULL;
1392                         }
1393 
1394                         if (!error) {
1395                                 ASSERT(dv);
1396                                 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1397                                 SDEV_SIMPLE_RELE(dv);
1398                         }
1399                         vp = NULL;
1400                         dv = NULL;
1401                 }
1402         }
1403 
1404 done:
1405         VN_RELE(dirvp);
1406         kmem_free(dbuf, dlen);
1407 
1408         return (error);
1409 }
1410 
1411 void
1412 sdev_filldir_dynamic(struct sdev_node *ddv)
1413 {
1414         int error;
1415         int i;
1416         struct vattr vattr;
1417         struct vattr *vap = &vattr;
1418         char *nm = NULL;
1419         struct sdev_node *dv = NULL;
1420 
1421         ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1422         ASSERT((ddv->sdev_flags & SDEV_BUILD));
1423 
1424         *vap = *sdev_getdefault_attr(VDIR);     /* note structure copy here */
1425         gethrestime(&vap->va_atime);
1426         vap->va_mtime = vap->va_atime;
1427         vap->va_ctime = vap->va_atime;
1428         for (i = 0; vtab[i].vt_name != NULL; i++) {
1429                 /*
1430                  * This early, we may be in a read-only /dev environment: leave
1431                  * the creation of any nodes we'd attempt to persist to
1432                  * devfsadm. Because /dev itself is normally persistent, any
1433                  * node which is not marked dynamic will end up being marked
1434                  * persistent. However, some nodes are both dynamic and
1435                  * persistent, mostly lofi and rlofi, so we need to be careful
1436                  * in our check.
1437                  */
1438                 if ((vtab[i].vt_flags & SDEV_PERSIST) ||
1439                     !(vtab[i].vt_flags & SDEV_DYNAMIC))
1440                         continue;
1441                 nm = vtab[i].vt_name;
1442                 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1443                 dv = NULL;
1444                 error = sdev_mknode(ddv, nm, &dv, vap, NULL,
1445                     NULL, kcred, SDEV_READY);
1446                 if (error) {
1447                         cmn_err(CE_WARN, "%s/%s: error %d\n",
1448                             ddv->sdev_name, nm, error);
1449                 } else {
1450                         ASSERT(dv);
1451                         ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1452                         SDEV_SIMPLE_RELE(dv);
1453                 }
1454         }
1455 }
1456 
1457 /*
1458  * Creating a backing store entry based on sdev_attr.
1459  * This is called either as part of node creation in a persistent directory
1460  * or from setattr/setsecattr to persist access attributes across reboot.
1461  */
1462 int
1463 sdev_shadow_node(struct sdev_node *dv, struct cred *cred)
1464 {
1465         int error = 0;
1466         struct vnode *dvp = SDEVTOV(dv->sdev_dotdot);
1467         struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp;
1468         struct vattr *vap = dv->sdev_attr;
1469         char *nm = dv->sdev_name;
1470         struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL;
1471 
1472         ASSERT(dv && dv->sdev_name && rdvp);
1473         ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL);
1474 
1475 lookup:
1476         /* try to find it in the backing store */
1477         error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred, NULL, NULL,
1478             NULL);
1479         if (error == 0) {
1480                 if (VOP_REALVP(*rvp, &rrvp, NULL) == 0) {
1481                         VN_HOLD(rrvp);
1482                         VN_RELE(*rvp);
1483                         *rvp = rrvp;
1484                 }
1485 
1486                 kmem_free(dv->sdev_attr, sizeof (vattr_t));
1487                 dv->sdev_attr = NULL;
1488                 dv->sdev_attrvp = *rvp;
1489                 return (0);
1490         }
1491 
1492         /* let's try to persist the node */
1493         gethrestime(&vap->va_atime);
1494         vap->va_mtime = vap->va_atime;
1495         vap->va_ctime = vap->va_atime;
1496         vap->va_mask |= AT_TYPE|AT_MODE;
1497         switch (vap->va_type) {
1498         case VDIR:
1499                 error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL);
1500                 sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n",
1501                     (void *)(*rvp), error));
1502                 if (!error)
1503                         VN_RELE(*rvp);
1504                 break;
1505         case VCHR:
1506         case VBLK:
1507         case VREG:
1508         case VDOOR:
1509                 error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE,
1510                     rvp, cred, 0, NULL, NULL);
1511                 sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n",
1512                     (void *)(*rvp), error));
1513                 if (!error)
1514                         VN_RELE(*rvp);
1515                 break;
1516         case VLNK:
1517                 ASSERT(dv->sdev_symlink);
1518                 error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred,
1519                     NULL, 0);
1520                 sdcmn_err9(("sdev_shadow_node: create symlink error %d\n",
1521                     error));
1522                 break;
1523         default:
1524                 cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node "
1525                     "create\n", nm);
1526                 /*NOTREACHED*/
1527         }
1528 
1529         /* go back to lookup to factor out spec node and set attrvp */
1530         if (error == 0)
1531                 goto lookup;
1532 
1533         sdcmn_err(("cannot persist %s - error %d\n", dv->sdev_path, error));
1534         return (error);
1535 }
1536 
1537 static void
1538 sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm)
1539 {
1540         struct sdev_node *dup = NULL;
1541 
1542         ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1543         if ((dup = sdev_findbyname(ddv, nm)) == NULL) {
1544                 sdev_direnter(ddv, *dv);
1545         } else {
1546                 VERIFY(dup->sdev_state != SDEV_ZOMBIE);
1547                 SDEV_SIMPLE_RELE(*dv);
1548                 sdev_nodedestroy(*dv, 0);
1549                 *dv = dup;
1550         }
1551 }
1552 
1553 static void
1554 sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv)
1555 {
1556         ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1557         sdev_dirdelete(ddv, *dv);
1558 }
1559 
1560 /*
1561  * update the in-core directory cache
1562  */
1563 void
1564 sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm,
1565     sdev_cache_ops_t ops)
1566 {
1567         ASSERT((SDEV_HELD(*dv)));
1568 
1569         ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1570         switch (ops) {
1571         case SDEV_CACHE_ADD:
1572                 sdev_cache_add(ddv, dv, nm);
1573                 break;
1574         case SDEV_CACHE_DELETE:
1575                 sdev_cache_delete(ddv, dv);
1576                 break;
1577         default:
1578                 break;
1579         }
1580 }
1581 
1582 /*
1583  * retrieve the named entry from the directory cache
1584  */
1585 struct sdev_node *
1586 sdev_cache_lookup(struct sdev_node *ddv, char *nm)
1587 {
1588         struct sdev_node *dv = NULL;
1589 
1590         ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
1591         dv = sdev_findbyname(ddv, nm);
1592 
1593         return (dv);
1594 }
1595 
1596 /*
1597  * Implicit reconfig for nodes constructed by a link generator
1598  * Start devfsadm if needed, or if devfsadm is in progress,
1599  * prepare to block on devfsadm either completing or
1600  * constructing the desired node.  As devfsadmd is global
1601  * in scope, constructing all necessary nodes, we only
1602  * need to initiate it once.
1603  */
1604 static int
1605 sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm)
1606 {
1607         int error = 0;
1608 
1609         if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
1610                 sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n",
1611                     ddv->sdev_name, nm, devfsadm_state));
1612                 mutex_enter(&dv->sdev_lookup_lock);
1613                 SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING));
1614                 mutex_exit(&dv->sdev_lookup_lock);
1615                 error = 0;
1616         } else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) {
1617                 sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n",
1618                     ddv->sdev_name, nm, devfsadm_state));
1619 
1620                 sdev_devfsadmd_thread(ddv, dv, kcred);
1621                 mutex_enter(&dv->sdev_lookup_lock);
1622                 SDEV_BLOCK_OTHERS(dv,
1623                     (SDEV_LOOKUP | SDEV_LGWAITING));
1624                 mutex_exit(&dv->sdev_lookup_lock);
1625                 error = 0;
1626         } else {
1627                 error = -1;
1628         }
1629 
1630         return (error);
1631 }
1632 
1633 /*
1634  *  Support for specialized device naming construction mechanisms
1635  */
1636 static int
1637 sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm,
1638     int (*callback)(struct sdev_node *, char *, void **, struct cred *,
1639     void *, char *), int flags, struct cred *cred)
1640 {
1641         int rv = 0;
1642         char *physpath = NULL;
1643         struct vattr vattr;
1644         struct vattr *vap = &vattr;
1645         struct sdev_node *dv = NULL;
1646 
1647         ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1648         if (flags & SDEV_VLINK) {
1649                 physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1650                 rv = callback(ddv, nm, (void *)&physpath, kcred, NULL,
1651                     NULL);
1652                 if (rv) {
1653                         kmem_free(physpath, MAXPATHLEN);
1654                         return (-1);
1655                 }
1656 
1657                 *vap = *sdev_getdefault_attr(VLNK);     /* structure copy */
1658                 vap->va_size = strlen(physpath);
1659                 gethrestime(&vap->va_atime);
1660                 vap->va_mtime = vap->va_atime;
1661                 vap->va_ctime = vap->va_atime;
1662 
1663                 rv = sdev_mknode(ddv, nm, &dv, vap, NULL,
1664                     (void *)physpath, cred, SDEV_READY);
1665                 kmem_free(physpath, MAXPATHLEN);
1666                 if (rv)
1667                         return (rv);
1668         } else if (flags & SDEV_VATTR) {
1669                 /*
1670                  * /dev/pts
1671                  *
1672                  * callback is responsible to set the basic attributes,
1673                  * e.g. va_type/va_uid/va_gid/
1674                  *    dev_t if VCHR or VBLK/
1675                  */
1676                 ASSERT(callback);
1677                 rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL);
1678                 if (rv) {
1679                         sdcmn_err3(("devname_lookup_func: SDEV_NONE "
1680                             "callback failed \n"));
1681                         return (-1);
1682                 }
1683 
1684                 rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL,
1685                     cred, SDEV_READY);
1686 
1687                 if (rv)
1688                         return (rv);
1689 
1690         } else {
1691                 impossible(("lookup: %s/%s by %s not supported (%d)\n",
1692                     SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm,
1693                     __LINE__));
1694                 rv = -1;
1695         }
1696 
1697         *dvp = dv;
1698         return (rv);
1699 }
1700 
1701 static int
1702 is_devfsadm_thread(char *exec_name)
1703 {
1704         /*
1705          * note: because devfsadmd -> /usr/sbin/devfsadm
1706          * it is safe to use "devfsadm" to capture the lookups
1707          * from devfsadm and its daemon version.
1708          */
1709         if (strcmp(exec_name, "devfsadm") == 0)
1710                 return (1);
1711         return (0);
1712 }
1713 
1714 /*
1715  * Lookup Order:
1716  *      sdev_node cache;
1717  *      backing store (SDEV_PERSIST);
1718  *      DBNR: a. dir_ops implemented in the loadable modules;
1719  *            b. vnode ops in vtab.
1720  */
1721 int
1722 devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp,
1723     struct cred *cred, int (*callback)(struct sdev_node *, char *, void **,
1724     struct cred *, void *, char *), int flags)
1725 {
1726         int rv = 0, nmlen;
1727         struct vnode *rvp = NULL;
1728         struct sdev_node *dv = NULL;
1729         int     retried = 0;
1730         int     error = 0;
1731         struct vattr vattr;
1732         char *lookup_thread = curproc->p_user.u_comm;
1733         int failed_flags = 0;
1734         int (*vtor)(struct sdev_node *) = NULL;
1735         int state;
1736         int parent_state;
1737         char *link = NULL;
1738 
1739         if (SDEVTOV(ddv)->v_type != VDIR)
1740                 return (ENOTDIR);
1741 
1742         /*
1743          * Empty name or ., return node itself.
1744          */
1745         nmlen = strlen(nm);
1746         if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) {
1747                 *vpp = SDEVTOV(ddv);
1748                 VN_HOLD(*vpp);
1749                 return (0);
1750         }
1751 
1752         /*
1753          * .., return the parent directory
1754          */
1755         if ((nmlen == 2) && (strcmp(nm, "..") == 0)) {
1756                 *vpp = SDEVTOV(ddv->sdev_dotdot);
1757                 VN_HOLD(*vpp);
1758                 return (0);
1759         }
1760 
1761         rw_enter(&ddv->sdev_contents, RW_READER);
1762         if (ddv->sdev_flags & SDEV_VTOR) {
1763                 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
1764                 ASSERT(vtor);
1765         }
1766 
1767 tryagain:
1768         /*
1769          * (a) directory cache lookup:
1770          */
1771         ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1772         parent_state = ddv->sdev_state;
1773         dv = sdev_cache_lookup(ddv, nm);
1774         if (dv) {
1775                 state = dv->sdev_state;
1776                 switch (state) {
1777                 case SDEV_INIT:
1778                         if (is_devfsadm_thread(lookup_thread))
1779                                 break;
1780 
1781                         /* ZOMBIED parent won't allow node creation */
1782                         if (parent_state == SDEV_ZOMBIE) {
1783                                 SD_TRACE_FAILED_LOOKUP(ddv, nm,
1784                                     retried);
1785                                 goto nolock_notfound;
1786                         }
1787 
1788                         mutex_enter(&dv->sdev_lookup_lock);
1789                         /* compensate the threads started after devfsadm */
1790                         if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
1791                             !(SDEV_IS_LOOKUP(dv)))
1792                                 SDEV_BLOCK_OTHERS(dv,
1793                                     (SDEV_LOOKUP | SDEV_LGWAITING));
1794 
1795                         if (SDEV_IS_LOOKUP(dv)) {
1796                                 failed_flags |= SLF_REBUILT;
1797                                 rw_exit(&ddv->sdev_contents);
1798                                 error = sdev_wait4lookup(dv, SDEV_LOOKUP);
1799                                 mutex_exit(&dv->sdev_lookup_lock);
1800                                 rw_enter(&ddv->sdev_contents, RW_READER);
1801 
1802                                 if (error != 0) {
1803                                         SD_TRACE_FAILED_LOOKUP(ddv, nm,
1804                                             retried);
1805                                         goto nolock_notfound;
1806                                 }
1807 
1808                                 state = dv->sdev_state;
1809                                 if (state == SDEV_INIT) {
1810                                         SD_TRACE_FAILED_LOOKUP(ddv, nm,
1811                                             retried);
1812                                         goto nolock_notfound;
1813                                 } else if (state == SDEV_READY) {
1814                                         goto found;
1815                                 } else if (state == SDEV_ZOMBIE) {
1816                                         rw_exit(&ddv->sdev_contents);
1817                                         SD_TRACE_FAILED_LOOKUP(ddv, nm,
1818                                             retried);
1819                                         SDEV_RELE(dv);
1820                                         goto lookup_failed;
1821                                 }
1822                         } else {
1823                                 mutex_exit(&dv->sdev_lookup_lock);
1824                         }
1825                         break;
1826                 case SDEV_READY:
1827                         goto found;
1828                 case SDEV_ZOMBIE:
1829                         rw_exit(&ddv->sdev_contents);
1830                         SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1831                         SDEV_RELE(dv);
1832                         goto lookup_failed;
1833                 default:
1834                         rw_exit(&ddv->sdev_contents);
1835                         SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1836                         sdev_lookup_failed(ddv, nm, failed_flags);
1837                         *vpp = NULLVP;
1838                         return (ENOENT);
1839                 }
1840         }
1841         ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1842 
1843         /*
1844          * ZOMBIED parent does not allow new node creation.
1845          * bail out early
1846          */
1847         if (parent_state == SDEV_ZOMBIE) {
1848                 rw_exit(&ddv->sdev_contents);
1849                 *vpp = NULLVP;
1850                 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1851                 return (ENOENT);
1852         }
1853 
1854         /*
1855          * (b0): backing store lookup
1856          *      SDEV_PERSIST is default except:
1857          *              1) pts nodes
1858          *              2) non-chmod'ed local nodes
1859          *              3) zvol nodes
1860          */
1861         if (SDEV_IS_PERSIST(ddv)) {
1862                 error = devname_backstore_lookup(ddv, nm, &rvp);
1863 
1864                 if (!error) {
1865 
1866                         vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
1867                         error = VOP_GETATTR(rvp, &vattr, 0, cred, NULL);
1868                         if (error) {
1869                                 rw_exit(&ddv->sdev_contents);
1870                                 if (dv)
1871                                         SDEV_RELE(dv);
1872                                 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1873                                 sdev_lookup_failed(ddv, nm, failed_flags);
1874                                 *vpp = NULLVP;
1875                                 return (ENOENT);
1876                         }
1877 
1878                         if (vattr.va_type == VLNK) {
1879                                 error = sdev_getlink(rvp, &link);
1880                                 if (error) {
1881                                         rw_exit(&ddv->sdev_contents);
1882                                         if (dv)
1883                                                 SDEV_RELE(dv);
1884                                         SD_TRACE_FAILED_LOOKUP(ddv, nm,
1885                                             retried);
1886                                         sdev_lookup_failed(ddv, nm,
1887                                             failed_flags);
1888                                         *vpp = NULLVP;
1889                                         return (ENOENT);
1890                                 }
1891                                 ASSERT(link != NULL);
1892                         }
1893 
1894                         if (!rw_tryupgrade(&ddv->sdev_contents)) {
1895                                 rw_exit(&ddv->sdev_contents);
1896                                 rw_enter(&ddv->sdev_contents, RW_WRITER);
1897                         }
1898                         error = sdev_mknode(ddv, nm, &dv, &vattr,
1899                             rvp, link, cred, SDEV_READY);
1900                         rw_downgrade(&ddv->sdev_contents);
1901 
1902                         if (link != NULL) {
1903                                 kmem_free(link, strlen(link) + 1);
1904                                 link = NULL;
1905                         }
1906 
1907                         if (error) {
1908                                 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1909                                 rw_exit(&ddv->sdev_contents);
1910                                 if (dv)
1911                                         SDEV_RELE(dv);
1912                                 goto lookup_failed;
1913                         } else {
1914                                 goto found;
1915                         }
1916                 } else if (retried) {
1917                         rw_exit(&ddv->sdev_contents);
1918                         sdcmn_err3(("retry of lookup of %s/%s: failed\n",
1919                             ddv->sdev_name, nm));
1920                         if (dv)
1921                                 SDEV_RELE(dv);
1922                         SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1923                         sdev_lookup_failed(ddv, nm, failed_flags);
1924                         *vpp = NULLVP;
1925                         return (ENOENT);
1926                 }
1927         }
1928 
1929 lookup_create_node:
1930         /* first thread that is doing the lookup on this node */
1931         if (callback) {
1932                 ASSERT(dv == NULL);
1933                 if (!rw_tryupgrade(&ddv->sdev_contents)) {
1934                         rw_exit(&ddv->sdev_contents);
1935                         rw_enter(&ddv->sdev_contents, RW_WRITER);
1936                 }
1937                 error = sdev_call_dircallback(ddv, &dv, nm, callback,
1938                     flags, cred);
1939                 rw_downgrade(&ddv->sdev_contents);
1940                 if (error == 0) {
1941                         goto found;
1942                 } else {
1943                         SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1944                         rw_exit(&ddv->sdev_contents);
1945                         goto lookup_failed;
1946                 }
1947         }
1948         if (!dv) {
1949                 if (!rw_tryupgrade(&ddv->sdev_contents)) {
1950                         rw_exit(&ddv->sdev_contents);
1951                         rw_enter(&ddv->sdev_contents, RW_WRITER);
1952                 }
1953                 error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL,
1954                     cred, SDEV_INIT);
1955                 if (!dv) {
1956                         rw_exit(&ddv->sdev_contents);
1957                         SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1958                         sdev_lookup_failed(ddv, nm, failed_flags);
1959                         *vpp = NULLVP;
1960                         return (ENOENT);
1961                 }
1962                 rw_downgrade(&ddv->sdev_contents);
1963         }
1964 
1965         /*
1966          * (b1) invoking devfsadm once per life time for devfsadm nodes
1967          */
1968         ASSERT(SDEV_HELD(dv));
1969 
1970         if (SDEV_IS_NO_NCACHE(dv))
1971                 failed_flags |= SLF_NO_NCACHE;
1972         if (sdev_reconfig_boot || !i_ddi_io_initialized() ||
1973             SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) ||
1974             ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) {
1975                 ASSERT(SDEV_HELD(dv));
1976                 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1977                 goto nolock_notfound;
1978         }
1979 
1980         /*
1981          * filter out known non-existent devices recorded
1982          * during initial reconfiguration boot for which
1983          * reconfig should not be done and lookup may
1984          * be short-circuited now.
1985          */
1986         if (sdev_lookup_filter(ddv, nm)) {
1987                 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1988                 goto nolock_notfound;
1989         }
1990 
1991         /* bypassing devfsadm internal nodes */
1992         if (is_devfsadm_thread(lookup_thread)) {
1993                 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1994                 goto nolock_notfound;
1995         }
1996 
1997         if (sdev_reconfig_disable) {
1998                 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1999                 goto nolock_notfound;
2000         }
2001 
2002         error = sdev_call_devfsadmd(ddv, dv, nm);
2003         if (error == 0) {
2004                 sdcmn_err8(("lookup of %s/%s by %s: reconfig\n",
2005                     ddv->sdev_name, nm, curproc->p_user.u_comm));
2006                 if (sdev_reconfig_verbose) {
2007                         cmn_err(CE_CONT,
2008                             "?lookup of %s/%s by %s: reconfig\n",
2009                             ddv->sdev_name, nm, curproc->p_user.u_comm);
2010                 }
2011                 retried = 1;
2012                 failed_flags |= SLF_REBUILT;
2013                 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
2014                 SDEV_SIMPLE_RELE(dv);
2015                 goto tryagain;
2016         } else {
2017                 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2018                 goto nolock_notfound;
2019         }
2020 
2021 found:
2022         ASSERT(dv->sdev_state == SDEV_READY);
2023         if (vtor) {
2024                 /*
2025                  * Check validity of returned node
2026                  */
2027                 switch (vtor(dv)) {
2028                 case SDEV_VTOR_VALID:
2029                         break;
2030                 case SDEV_VTOR_STALE:
2031                         /*
2032                          * The name exists, but the cache entry is
2033                          * stale and needs to be re-created.
2034                          */
2035                         ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2036                         if (rw_tryupgrade(&ddv->sdev_contents) == 0) {
2037                                 rw_exit(&ddv->sdev_contents);
2038                                 rw_enter(&ddv->sdev_contents, RW_WRITER);
2039                         }
2040                         sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE);
2041                         rw_downgrade(&ddv->sdev_contents);
2042                         SDEV_RELE(dv);
2043                         dv = NULL;
2044                         goto lookup_create_node;
2045                         /* FALLTHRU */
2046                 case SDEV_VTOR_INVALID:
2047                         SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2048                         sdcmn_err7(("lookup: destroy invalid "
2049                             "node: %s(%p)\n", dv->sdev_name, (void *)dv));
2050                         goto nolock_notfound;
2051                 case SDEV_VTOR_SKIP:
2052                         sdcmn_err7(("lookup: node not applicable - "
2053                             "skipping: %s(%p)\n", dv->sdev_name, (void *)dv));
2054                         rw_exit(&ddv->sdev_contents);
2055                         SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2056                         SDEV_RELE(dv);
2057                         goto lookup_failed;
2058                 default:
2059                         cmn_err(CE_PANIC,
2060                             "dev fs: validator failed: %s(%p)\n",
2061                             dv->sdev_name, (void *)dv);
2062                         break;
2063                 }
2064         }
2065 
2066         rw_exit(&ddv->sdev_contents);
2067         rv = sdev_to_vp(dv, vpp);
2068         sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d "
2069             "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count,
2070             dv->sdev_state, nm, rv));
2071         return (rv);
2072 
2073 nolock_notfound:
2074         /*
2075          * Destroy the node that is created for synchronization purposes.
2076          */
2077         sdcmn_err3(("devname_lookup_func: %s with state %d\n",
2078             nm, dv->sdev_state));
2079         ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2080         if (dv->sdev_state == SDEV_INIT) {
2081                 if (!rw_tryupgrade(&ddv->sdev_contents)) {
2082                         rw_exit(&ddv->sdev_contents);
2083                         rw_enter(&ddv->sdev_contents, RW_WRITER);
2084                 }
2085 
2086                 /*
2087                  * Node state may have changed during the lock
2088                  * changes. Re-check.
2089                  */
2090                 if (dv->sdev_state == SDEV_INIT) {
2091                         sdev_dirdelete(ddv, dv);
2092                         rw_exit(&ddv->sdev_contents);
2093                         sdev_lookup_failed(ddv, nm, failed_flags);
2094                         SDEV_RELE(dv);
2095                         *vpp = NULL;
2096                         return (ENOENT);
2097                 }
2098         }
2099 
2100         rw_exit(&ddv->sdev_contents);
2101         SDEV_RELE(dv);
2102 
2103 lookup_failed:
2104         sdev_lookup_failed(ddv, nm, failed_flags);
2105         *vpp = NULL;
2106         return (ENOENT);
2107 }
2108 
2109 /*
2110  * Given a directory node, mark all nodes beneath as
2111  * STALE, i.e. nodes that don't exist as far as new
2112  * consumers are concerned.  Remove them from the
2113  * list of directory entries so that no lookup or
2114  * directory traversal will find them.  The node
2115  * not deallocated so existing holds are not affected.
2116  */
2117 void
2118 sdev_stale(struct sdev_node *ddv)
2119 {
2120         struct sdev_node *dv;
2121         struct vnode *vp;
2122 
2123         ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2124 
2125         rw_enter(&ddv->sdev_contents, RW_WRITER);
2126         while ((dv = SDEV_FIRST_ENTRY(ddv)) != NULL) {
2127                 vp = SDEVTOV(dv);
2128                 SDEV_HOLD(dv);
2129                 if (vp->v_type == VDIR)
2130                         sdev_stale(dv);
2131 
2132                 sdev_dirdelete(ddv, dv);
2133                 SDEV_RELE(dv);
2134         }
2135         ddv->sdev_flags |= SDEV_BUILD;
2136         rw_exit(&ddv->sdev_contents);
2137 }
2138 
2139 /*
2140  * Given a directory node, clean out all the nodes beneath.
2141  * If expr is specified, clean node with names matching expr.
2142  * If SDEV_ENFORCE is specified in flags, busy nodes are made stale,
2143  *      so they are excluded from future lookups.
2144  */
2145 int
2146 sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags)
2147 {
2148         int error = 0;
2149         int busy = 0;
2150         struct vnode *vp;
2151         struct sdev_node *dv, *next;
2152         int bkstore = 0;
2153         int len = 0;
2154         char *bks_name = NULL;
2155 
2156         ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2157 
2158         /*
2159          * We try our best to destroy all unused sdev_node's
2160          */
2161         rw_enter(&ddv->sdev_contents, RW_WRITER);
2162         for (dv = SDEV_FIRST_ENTRY(ddv); dv != NULL; dv = next) {
2163                 next = SDEV_NEXT_ENTRY(ddv, dv);
2164                 vp = SDEVTOV(dv);
2165 
2166                 if (expr && gmatch(dv->sdev_name, expr) == 0)
2167                         continue;
2168 
2169                 if (vp->v_type == VDIR &&
2170                     sdev_cleandir(dv, NULL, flags) != 0) {
2171                         sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2172                             dv->sdev_name));
2173                         busy++;
2174                         continue;
2175                 }
2176 
2177                 if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) {
2178                         sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2179                             dv->sdev_name));
2180                         busy++;
2181                         continue;
2182                 }
2183 
2184                 /*
2185                  * at this point, either dv is not held or SDEV_ENFORCE
2186                  * is specified. In either case, dv needs to be deleted
2187                  */
2188                 SDEV_HOLD(dv);
2189 
2190                 bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0;
2191                 if (bkstore && (vp->v_type == VDIR))
2192                         bkstore += 1;
2193 
2194                 if (bkstore) {
2195                         len = strlen(dv->sdev_name) + 1;
2196                         bks_name = kmem_alloc(len, KM_SLEEP);
2197                         bcopy(dv->sdev_name, bks_name, len);
2198                 }
2199 
2200                 sdev_dirdelete(ddv, dv);
2201 
2202                 /* take care the backing store clean up */
2203                 if (bkstore) {
2204                         ASSERT(bks_name);
2205                         ASSERT(ddv->sdev_attrvp);
2206 
2207                         if (bkstore == 1) {
2208                                 error = VOP_REMOVE(ddv->sdev_attrvp,
2209                                     bks_name, kcred, NULL, 0);
2210                         } else if (bkstore == 2) {
2211                                 error = VOP_RMDIR(ddv->sdev_attrvp,
2212                                     bks_name, ddv->sdev_attrvp, kcred, NULL, 0);
2213                         }
2214 
2215                         /* do not propagate the backing store errors */
2216                         if (error) {
2217                                 sdcmn_err9(("sdev_cleandir: backing store"
2218                                     "not cleaned\n"));
2219                                 error = 0;
2220                         }
2221 
2222                         bkstore = 0;
2223                         kmem_free(bks_name, len);
2224                         bks_name = NULL;
2225                         len = 0;
2226                 }
2227 
2228                 ddv->sdev_flags |= SDEV_BUILD;
2229                 SDEV_RELE(dv);
2230         }
2231 
2232         ddv->sdev_flags |= SDEV_BUILD;
2233         rw_exit(&ddv->sdev_contents);
2234 
2235         if (busy) {
2236                 error = EBUSY;
2237         }
2238 
2239         return (error);
2240 }
2241 
2242 /*
2243  * a convenient wrapper for readdir() funcs
2244  */
2245 size_t
2246 add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off)
2247 {
2248         size_t reclen = DIRENT64_RECLEN(strlen(nm));
2249         if (reclen > size)
2250                 return (0);
2251 
2252         de->d_ino = (ino64_t)ino;
2253         de->d_off = (off64_t)off + 1;
2254         de->d_reclen = (ushort_t)reclen;
2255         (void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen));
2256         return (reclen);
2257 }
2258 
2259 /*
2260  * sdev_mount service routines
2261  */
2262 int
2263 sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args)
2264 {
2265         int     error;
2266 
2267         if (uap->datalen != sizeof (*args))
2268                 return (EINVAL);
2269 
2270         if (error = copyin(uap->dataptr, args, sizeof (*args))) {
2271                 cmn_err(CE_WARN, "sdev_copyin_mountargs: can not"
2272                     "get user data. error %d\n", error);
2273                 return (EFAULT);
2274         }
2275 
2276         return (0);
2277 }
2278 
2279 #ifdef nextdp
2280 #undef nextdp
2281 #endif
2282 #define nextdp(dp)      ((struct dirent64 *) \
2283                             (intptr_t)((char *)(dp) + (dp)->d_reclen))
2284 
2285 /*
2286  * readdir helper func
2287  */
2288 int
2289 devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp,
2290     int flags)
2291 {
2292         struct sdev_node *ddv = VTOSDEV(vp);
2293         struct sdev_node *dv;
2294         dirent64_t      *dp;
2295         ulong_t         outcount = 0;
2296         size_t          namelen;
2297         ulong_t         alloc_count;
2298         void            *outbuf;
2299         struct iovec    *iovp;
2300         int             error = 0;
2301         size_t          reclen;
2302         offset_t        diroff;
2303         offset_t        soff;
2304         int             this_reclen;
2305         int (*vtor)(struct sdev_node *) = NULL;
2306         struct vattr attr;
2307         timestruc_t now;
2308 
2309         ASSERT(ddv->sdev_attr || ddv->sdev_attrvp);
2310         ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2311 
2312         if (uiop->uio_loffset >= MAXOFF_T) {
2313                 if (eofp)
2314                         *eofp = 1;
2315                 return (0);
2316         }
2317 
2318         if (uiop->uio_iovcnt != 1)
2319                 return (EINVAL);
2320 
2321         if (vp->v_type != VDIR)
2322                 return (ENOTDIR);
2323 
2324         if (ddv->sdev_flags & SDEV_VTOR) {
2325                 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
2326                 ASSERT(vtor);
2327         }
2328 
2329         if (eofp != NULL)
2330                 *eofp = 0;
2331 
2332         soff = uiop->uio_loffset;
2333         iovp = uiop->uio_iov;
2334         alloc_count = iovp->iov_len;
2335         dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP);
2336         outcount = 0;
2337 
2338         if (ddv->sdev_state == SDEV_ZOMBIE)
2339                 goto get_cache;
2340 
2341         if (SDEV_IS_GLOBAL(ddv)) {
2342 
2343                 if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) &&
2344                     !sdev_reconfig_boot && (flags & SDEV_BROWSE) &&
2345                     !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) &&
2346                     ((moddebug & MODDEBUG_FINI_EBUSY) == 0) &&
2347                     !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) &&
2348                     !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
2349                     !sdev_reconfig_disable) {
2350                         /*
2351                          * invoking "devfsadm" to do system device reconfig
2352                          */
2353                         mutex_enter(&ddv->sdev_lookup_lock);
2354                         SDEV_BLOCK_OTHERS(ddv,
2355                             (SDEV_READDIR|SDEV_LGWAITING));
2356                         mutex_exit(&ddv->sdev_lookup_lock);
2357 
2358                         sdcmn_err8(("readdir of %s by %s: reconfig\n",
2359                             ddv->sdev_path, curproc->p_user.u_comm));
2360                         if (sdev_reconfig_verbose) {
2361                                 cmn_err(CE_CONT,
2362                                     "?readdir of %s by %s: reconfig\n",
2363                                     ddv->sdev_path, curproc->p_user.u_comm);
2364                         }
2365 
2366                         sdev_devfsadmd_thread(ddv, NULL, kcred);
2367                 } else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
2368                         /*
2369                          * compensate the "ls" started later than "devfsadm"
2370                          */
2371                         mutex_enter(&ddv->sdev_lookup_lock);
2372                         SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING));
2373                         mutex_exit(&ddv->sdev_lookup_lock);
2374                 }
2375 
2376                 /*
2377                  * release the contents lock so that
2378                  * the cache may be updated by devfsadmd
2379                  */
2380                 rw_exit(&ddv->sdev_contents);
2381                 mutex_enter(&ddv->sdev_lookup_lock);
2382                 if (SDEV_IS_READDIR(ddv))
2383                         (void) sdev_wait4lookup(ddv, SDEV_READDIR);
2384                 mutex_exit(&ddv->sdev_lookup_lock);
2385                 rw_enter(&ddv->sdev_contents, RW_READER);
2386 
2387                 sdcmn_err4(("readdir of directory %s by %s\n",
2388                     ddv->sdev_name, curproc->p_user.u_comm));
2389                 if (ddv->sdev_flags & SDEV_BUILD) {
2390                         if (SDEV_IS_PERSIST(ddv)) {
2391                                 error = sdev_filldir_from_store(ddv,
2392                                     alloc_count, cred);
2393                         }
2394                         ddv->sdev_flags &= ~SDEV_BUILD;
2395                 }
2396         }
2397 
2398 get_cache:
2399         /* handle "." and ".." */
2400         diroff = 0;
2401         if (soff == 0) {
2402                 /* first time */
2403                 this_reclen = DIRENT64_RECLEN(1);
2404                 if (alloc_count < this_reclen) {
2405                         error = EINVAL;
2406                         goto done;
2407                 }
2408 
2409                 dp->d_ino = (ino64_t)ddv->sdev_ino;
2410                 dp->d_off = (off64_t)1;
2411                 dp->d_reclen = (ushort_t)this_reclen;
2412 
2413                 (void) strncpy(dp->d_name, ".",
2414                     DIRENT64_NAMELEN(this_reclen));
2415                 outcount += dp->d_reclen;
2416                 dp = nextdp(dp);
2417         }
2418 
2419         diroff++;
2420         if (soff <= 1) {
2421                 this_reclen = DIRENT64_RECLEN(2);
2422                 if (alloc_count < outcount + this_reclen) {
2423                         error = EINVAL;
2424                         goto done;
2425                 }
2426 
2427                 dp->d_reclen = (ushort_t)this_reclen;
2428                 dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino;
2429                 dp->d_off = (off64_t)2;
2430 
2431                 (void) strncpy(dp->d_name, "..",
2432                     DIRENT64_NAMELEN(this_reclen));
2433                 outcount += dp->d_reclen;
2434 
2435                 dp = nextdp(dp);
2436         }
2437 
2438 
2439         /* gets the cache */
2440         diroff++;
2441         for (dv = SDEV_FIRST_ENTRY(ddv); dv;
2442             dv = SDEV_NEXT_ENTRY(ddv, dv), diroff++) {
2443                 sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n",
2444                     diroff, soff, dv->sdev_name));
2445 
2446                 /* bypassing pre-matured nodes */
2447                 if (diroff < soff || (dv->sdev_state != SDEV_READY)) {
2448                         sdcmn_err3(("sdev_readdir: pre-mature node  "
2449                             "%s %d\n", dv->sdev_name, dv->sdev_state));
2450                         continue;
2451                 }
2452 
2453                 /*
2454                  * Check validity of node
2455                  * Drop invalid and nodes to be skipped.
2456                  * A node the validator indicates as stale needs
2457                  * to be returned as presumably the node name itself
2458                  * is valid and the node data itself will be refreshed
2459                  * on lookup.  An application performing a readdir then
2460                  * stat on each entry should thus always see consistent
2461                  * data.  In any case, it is not possible to synchronize
2462                  * with dynamic kernel state, and any view we return can
2463                  * never be anything more than a snapshot at a point in time.
2464                  */
2465                 if (vtor) {
2466                         switch (vtor(dv)) {
2467                         case SDEV_VTOR_VALID:
2468                                 break;
2469                         case SDEV_VTOR_INVALID:
2470                         case SDEV_VTOR_SKIP:
2471                                 continue;
2472                         case SDEV_VTOR_STALE:
2473                                 sdcmn_err3(("sdev_readir: %s stale\n",
2474                                     dv->sdev_name));
2475                                 break;
2476                         default:
2477                                 cmn_err(CE_PANIC,
2478                                     "dev fs: validator failed: %s(%p)\n",
2479                                     dv->sdev_name, (void *)dv);
2480                                 break;
2481                         /*NOTREACHED*/
2482                         }
2483                 }
2484 
2485                 namelen = strlen(dv->sdev_name);
2486                 reclen = DIRENT64_RECLEN(namelen);
2487                 if (outcount + reclen > alloc_count) {
2488                         goto full;
2489                 }
2490                 dp->d_reclen = (ushort_t)reclen;
2491                 dp->d_ino = (ino64_t)dv->sdev_ino;
2492                 dp->d_off = (off64_t)diroff + 1;
2493                 (void) strncpy(dp->d_name, dv->sdev_name,
2494                     DIRENT64_NAMELEN(reclen));
2495                 outcount += reclen;
2496                 dp = nextdp(dp);
2497         }
2498 
2499 full:
2500         sdcmn_err4(("sdev_readdir: moving %lu bytes: "
2501             "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff,
2502             (void *)dv));
2503 
2504         if (outcount)
2505                 error = uiomove(outbuf, outcount, UIO_READ, uiop);
2506 
2507         if (!error) {
2508                 uiop->uio_loffset = diroff;
2509                 if (eofp)
2510                         *eofp = dv ? 0 : 1;
2511         }
2512 
2513 
2514         if (ddv->sdev_attrvp) {
2515                 gethrestime(&now);
2516                 attr.va_ctime = now;
2517                 attr.va_atime = now;
2518                 attr.va_mask = AT_CTIME|AT_ATIME;
2519 
2520                 (void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL);
2521         }
2522 done:
2523         kmem_free(outbuf, alloc_count);
2524         return (error);
2525 }
2526 
2527 static int
2528 sdev_modctl_lookup(const char *path, vnode_t **r_vp)
2529 {
2530         vnode_t *vp;
2531         vnode_t *cvp;
2532         struct sdev_node *svp;
2533         char *nm;
2534         struct pathname pn;
2535         int error;
2536         int persisted = 0;
2537 
2538         ASSERT(INGLOBALZONE(curproc));
2539 
2540         if (error = pn_get((char *)path, UIO_SYSSPACE, &pn))
2541                 return (error);
2542         nm = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2543 
2544         vp = rootdir;
2545         VN_HOLD(vp);
2546 
2547         while (pn_pathleft(&pn)) {
2548                 ASSERT(vp->v_type == VDIR || vp->v_type == VLNK);
2549                 (void) pn_getcomponent(&pn, nm);
2550 
2551                 /*
2552                  * Deal with the .. special case where we may be
2553                  * traversing up across a mount point, to the
2554                  * root of this filesystem or global root.
2555                  */
2556                 if (nm[0] == '.' && nm[1] == '.' && nm[2] == 0) {
2557 checkforroot:
2558                         if (VN_CMP(vp, rootdir)) {
2559                                 nm[1] = 0;
2560                         } else if (vp->v_flag & VROOT) {
2561                                 vfs_t *vfsp;
2562                                 cvp = vp;
2563                                 vfsp = cvp->v_vfsp;
2564                                 vfs_rlock_wait(vfsp);
2565                                 vp = cvp->v_vfsp->vfs_vnodecovered;
2566                                 if (vp == NULL ||
2567                                     (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) {
2568                                         vfs_unlock(vfsp);
2569                                         VN_RELE(cvp);
2570                                         error = EIO;
2571                                         break;
2572                                 }
2573                                 VN_HOLD(vp);
2574                                 vfs_unlock(vfsp);
2575                                 VN_RELE(cvp);
2576                                 cvp = NULL;
2577                                 goto checkforroot;
2578                         }
2579                 }
2580 
2581                 error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred, NULL,
2582                     NULL, NULL);
2583                 if (error) {
2584                         VN_RELE(vp);
2585                         break;
2586                 }
2587 
2588                 /* traverse mount points encountered on our journey */
2589                 if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) {
2590                         VN_RELE(vp);
2591                         VN_RELE(cvp);
2592                         break;
2593                 }
2594 
2595                 /*
2596                  * symbolic link, can be either relative and absolute
2597                  */
2598                 if ((cvp->v_type == VLNK) && pn_pathleft(&pn)) {
2599                         struct pathname linkpath;
2600                         pn_alloc(&linkpath);
2601                         if (error = pn_getsymlink(cvp, &linkpath, kcred)) {
2602                                 pn_free(&linkpath);
2603                                 break;
2604                         }
2605                         if (pn_pathleft(&linkpath) == 0)
2606                                 (void) pn_set(&linkpath, ".");
2607                         error = pn_insert(&pn, &linkpath, strlen(nm));
2608                         pn_free(&linkpath);
2609                         if (pn.pn_pathlen == 0) {
2610                                 VN_RELE(vp);
2611                                 return (ENOENT);
2612                         }
2613                         if (pn.pn_path[0] == '/') {
2614                                 pn_skipslash(&pn);
2615                                 VN_RELE(vp);
2616                                 VN_RELE(cvp);
2617                                 vp = rootdir;
2618                                 VN_HOLD(vp);
2619                         } else {
2620                                 VN_RELE(cvp);
2621                         }
2622                         continue;
2623                 }
2624 
2625                 VN_RELE(vp);
2626 
2627                 /*
2628                  * Direct the operation to the persisting filesystem
2629                  * underlying /dev.  Bail if we encounter a
2630                  * non-persistent dev entity here.
2631                  */
2632                 if (cvp->v_vfsp->vfs_fstype == devtype) {
2633 
2634                         if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) {
2635                                 error = ENOENT;
2636                                 VN_RELE(cvp);
2637                                 break;
2638                         }
2639 
2640                         if (VTOSDEV(cvp) == NULL) {
2641                                 error = ENOENT;
2642                                 VN_RELE(cvp);
2643                                 break;
2644                         }
2645                         svp = VTOSDEV(cvp);
2646                         if ((vp = svp->sdev_attrvp) == NULL) {
2647                                 error = ENOENT;
2648                                 VN_RELE(cvp);
2649                                 break;
2650                         }
2651                         persisted = 1;
2652                         VN_HOLD(vp);
2653                         VN_RELE(cvp);
2654                         cvp = vp;
2655                 }
2656 
2657                 vp = cvp;
2658                 pn_skipslash(&pn);
2659         }
2660 
2661         kmem_free(nm, MAXNAMELEN);
2662         pn_free(&pn);
2663 
2664         if (error)
2665                 return (error);
2666 
2667         /*
2668          * Only return persisted nodes in the filesystem underlying /dev.
2669          */
2670         if (!persisted) {
2671                 VN_RELE(vp);
2672                 return (ENOENT);
2673         }
2674 
2675         *r_vp = vp;
2676         return (0);
2677 }
2678 
2679 int
2680 sdev_modctl_readdir(const char *dir, char ***dirlistp,
2681         int *npathsp, int *npathsp_alloc, int checking_empty)
2682 {
2683         char    **pathlist = NULL;
2684         char    **newlist = NULL;
2685         int     npaths = 0;
2686         int     npaths_alloc = 0;
2687         dirent64_t *dbuf = NULL;
2688         int     n;
2689         char    *s;
2690         int error;
2691         vnode_t *vp;
2692         int eof;
2693         struct iovec iov;
2694         struct uio uio;
2695         struct dirent64 *dp;
2696         size_t dlen;
2697         size_t dbuflen;
2698         int ndirents = 64;
2699         char *nm;
2700 
2701         error = sdev_modctl_lookup(dir, &vp);
2702         sdcmn_err11(("modctl readdir: %s by %s: %s\n",
2703             dir, curproc->p_user.u_comm,
2704             (error == 0) ? "ok" : "failed"));
2705         if (error)
2706                 return (error);
2707 
2708         dlen = ndirents * (sizeof (*dbuf));
2709         dbuf = kmem_alloc(dlen, KM_SLEEP);
2710 
2711         uio.uio_iov = &iov;
2712         uio.uio_iovcnt = 1;
2713         uio.uio_segflg = UIO_SYSSPACE;
2714         uio.uio_fmode = 0;
2715         uio.uio_extflg = UIO_COPY_CACHED;
2716         uio.uio_loffset = 0;
2717         uio.uio_llimit = MAXOFFSET_T;
2718 
2719         eof = 0;
2720         error = 0;
2721         while (!error && !eof) {
2722                 uio.uio_resid = dlen;
2723                 iov.iov_base = (char *)dbuf;
2724                 iov.iov_len = dlen;
2725 
2726                 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2727                 error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0);
2728                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2729 
2730                 dbuflen = dlen - uio.uio_resid;
2731 
2732                 if (error || dbuflen == 0)
2733                         break;
2734 
2735                 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
2736                     dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
2737 
2738                         nm = dp->d_name;
2739 
2740                         if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
2741                                 continue;
2742                         if (npaths == npaths_alloc) {
2743                                 npaths_alloc += 64;
2744                                 newlist = (char **)
2745                                     kmem_zalloc((npaths_alloc + 1) *
2746                                     sizeof (char *), KM_SLEEP);
2747                                 if (pathlist) {
2748                                         bcopy(pathlist, newlist,
2749                                             npaths * sizeof (char *));
2750                                         kmem_free(pathlist,
2751                                             (npaths + 1) * sizeof (char *));
2752                                 }
2753                                 pathlist = newlist;
2754                         }
2755                         n = strlen(nm) + 1;
2756                         s = kmem_alloc(n, KM_SLEEP);
2757                         bcopy(nm, s, n);
2758                         pathlist[npaths++] = s;
2759                         sdcmn_err11(("  %s/%s\n", dir, s));
2760 
2761                         /* if checking empty, one entry is as good as many */
2762                         if (checking_empty) {
2763                                 eof = 1;
2764                                 break;
2765                         }
2766                 }
2767         }
2768 
2769 exit:
2770         VN_RELE(vp);
2771 
2772         if (dbuf)
2773                 kmem_free(dbuf, dlen);
2774 
2775         if (error)
2776                 return (error);
2777 
2778         *dirlistp = pathlist;
2779         *npathsp = npaths;
2780         *npathsp_alloc = npaths_alloc;
2781 
2782         return (0);
2783 }
2784 
2785 void
2786 sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc)
2787 {
2788         int     i, n;
2789 
2790         for (i = 0; i < npaths; i++) {
2791                 n = strlen(pathlist[i]) + 1;
2792                 kmem_free(pathlist[i], n);
2793         }
2794 
2795         kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *));
2796 }
2797 
2798 int
2799 sdev_modctl_devexists(const char *path)
2800 {
2801         vnode_t *vp;
2802         int error;
2803 
2804         error = sdev_modctl_lookup(path, &vp);
2805         sdcmn_err11(("modctl dev exists: %s by %s: %s\n",
2806             path, curproc->p_user.u_comm,
2807             (error == 0) ? "ok" : "failed"));
2808         if (error == 0)
2809                 VN_RELE(vp);
2810 
2811         return (error);
2812 }
2813 
2814 /*
2815  * a generic setattr() function
2816  *
2817  * note: flags only supports AT_UID and AT_GID.
2818  *       Future enhancements can be done for other types, e.g. AT_MODE
2819  */
2820 int
2821 devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags,
2822     struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *,
2823     int), int protocol)
2824 {
2825         struct sdev_node        *dv = VTOSDEV(vp);
2826         struct sdev_node        *parent = dv->sdev_dotdot;
2827         struct vattr            *get;
2828         uint_t                  mask = vap->va_mask;
2829         int                     error;
2830 
2831         /* some sanity checks */
2832         if (vap->va_mask & AT_NOSET)
2833                 return (EINVAL);
2834 
2835         if (vap->va_mask & AT_SIZE) {
2836                 if (vp->v_type == VDIR) {
2837                         return (EISDIR);
2838                 }
2839         }
2840 
2841         /* no need to set attribute, but do not fail either */
2842         ASSERT(parent);
2843         rw_enter(&parent->sdev_contents, RW_READER);
2844         if (dv->sdev_state == SDEV_ZOMBIE) {
2845                 rw_exit(&parent->sdev_contents);
2846                 return (0);
2847         }
2848 
2849         /* If backing store exists, just set it. */
2850         if (dv->sdev_attrvp) {
2851                 rw_exit(&parent->sdev_contents);
2852                 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
2853         }
2854 
2855         /*
2856          * Otherwise, for nodes with the persistence attribute, create it.
2857          */
2858         ASSERT(dv->sdev_attr);
2859         if (SDEV_IS_PERSIST(dv) ||
2860             ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) {
2861                 sdev_vattr_merge(dv, vap);
2862                 rw_enter(&dv->sdev_contents, RW_WRITER);
2863                 error = sdev_shadow_node(dv, cred);
2864                 rw_exit(&dv->sdev_contents);
2865                 rw_exit(&parent->sdev_contents);
2866 
2867                 if (error)
2868                         return (error);
2869                 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
2870         }
2871 
2872 
2873         /*
2874          * sdev_attr was allocated in sdev_mknode
2875          */
2876         rw_enter(&dv->sdev_contents, RW_WRITER);
2877         error = secpolicy_vnode_setattr(cred, vp, vap,
2878             dv->sdev_attr, flags, sdev_unlocked_access, dv);
2879         if (error) {
2880                 rw_exit(&dv->sdev_contents);
2881                 rw_exit(&parent->sdev_contents);
2882                 return (error);
2883         }
2884 
2885         get = dv->sdev_attr;
2886         if (mask & AT_MODE) {
2887                 get->va_mode &= S_IFMT;
2888                 get->va_mode |= vap->va_mode & ~S_IFMT;
2889         }
2890 
2891         if ((mask & AT_UID) || (mask & AT_GID)) {
2892                 if (mask & AT_UID)
2893                         get->va_uid = vap->va_uid;
2894                 if (mask & AT_GID)
2895                         get->va_gid = vap->va_gid;
2896                 /*
2897                  * a callback must be provided if the protocol is set
2898                  */
2899                 if ((protocol & AT_UID) || (protocol & AT_GID)) {
2900                         ASSERT(callback);
2901                         error = callback(dv, get, protocol);
2902                         if (error) {
2903                                 rw_exit(&dv->sdev_contents);
2904                                 rw_exit(&parent->sdev_contents);
2905                                 return (error);
2906                         }
2907                 }
2908         }
2909 
2910         if (mask & AT_ATIME)
2911                 get->va_atime = vap->va_atime;
2912         if (mask & AT_MTIME)
2913                 get->va_mtime = vap->va_mtime;
2914         if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) {
2915                 gethrestime(&get->va_ctime);
2916         }
2917 
2918         sdev_vattr_merge(dv, get);
2919         rw_exit(&dv->sdev_contents);
2920         rw_exit(&parent->sdev_contents);
2921         return (0);
2922 }
2923 
2924 /*
2925  * a generic inactive() function
2926  */
2927 /*ARGSUSED*/
2928 void
2929 devname_inactive_func(struct vnode *vp, struct cred *cred,
2930     void (*callback)(struct vnode *))
2931 {
2932         int clean;
2933         struct sdev_node *dv = VTOSDEV(vp);
2934         int state;
2935 
2936         mutex_enter(&vp->v_lock);
2937         ASSERT(vp->v_count >= 1);
2938 
2939 
2940         if (vp->v_count == 1 && callback != NULL)
2941                 callback(vp);
2942 
2943         rw_enter(&dv->sdev_contents, RW_WRITER);
2944         state = dv->sdev_state;
2945 
2946         clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE);
2947 
2948         /*
2949          * sdev is a rather bad public citizen. It violates the general
2950          * agreement that in memory nodes should always have a valid reference
2951          * count on their vnode. But that's not the case here. This means that
2952          * we do actually have to distinguish between getting inactive callbacks
2953          * for zombies and otherwise. This should probably be fixed.
2954          */
2955         if (clean) {
2956                 /* Remove the . entry to ourselves */
2957                 if (vp->v_type == VDIR) {
2958                         decr_link(dv);
2959                 }
2960                 VERIFY(dv->sdev_nlink == 1);
2961                 decr_link(dv);
2962                 --vp->v_count;
2963                 rw_exit(&dv->sdev_contents);
2964                 mutex_exit(&vp->v_lock);
2965                 sdev_nodedestroy(dv, 0);
2966         } else {
2967                 --vp->v_count;
2968                 rw_exit(&dv->sdev_contents);
2969                 mutex_exit(&vp->v_lock);
2970         }
2971 }