1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 /*
  25  * Copyright 2016, Joyent, Inc.
  26  */
  27 
  28 /*
  29  * vnode ops for the /dev filesystem
  30  *
  31  * - VDIR, VCHR, CBLK, and VLNK are considered must supported files
  32  * - VREG and VDOOR are used for some internal implementations in
  33  *    the global zone, e.g. devname and devfsadm communication
  34  * - other file types are unusual in this namespace and
  35  *    not supported for now
  36  */
  37 
  38 /*
  39  * sdev has a few basic goals:
  40  *   o Provide /dev for the global zone as well as various non-global zones.
  41  *   o Provide the basic functionality that devfsadm might need (mknod,
  42  *     symlinks, etc.)
  43  *   o Allow persistent permissions on files in /dev.
  44  *   o Allow for dynamic directories and nodes for use by various services (pts,
  45  *     zvol, net, etc.)
  46  *
  47  * The sdev file system is primarily made up of sdev_node_t's which is sdev's
  48  * counterpart to the vnode_t. There are two different classes of sdev_node_t's
  49  * that we generally care about, dynamic and otherwise.
  50  *
  51  * Persisting Information
  52  * ----------------------
  53  *
  54  * When sdev is mounted, it keeps track of the underlying file system it is
  55  * mounted over. In certain situations, sdev will go and create entries in that
  56  * underlying file system. These underlying 'back end' nodes are used as proxies
  57  * for various changes in permissions. While specific sets of nodes, such as
  58  * dynamic ones, are exempt, this process stores permission changes against
  59  * these back end nodes. The point of all of this is to allow for these settings
  60  * to persist across host and zone reboots. As an example, consider the entry
  61  * /dev/dsk/c0t0d0 which is a character device and that / is in UFS. Upon
  62  * changing the permissions on c0t0d0 you'd have the following logical
  63  * relationships:
  64  *
  65  *    +------------------+   sdev_vnode     +--------------+
  66  *    | sdev_node_t      |<---------------->| vnode_t      |
  67  *    | /dev/dsk/c0t0d0  |<---------------->| for sdev     |
  68  *    +------------------+                  +--------------+
  69  *           |
  70  *           | sdev_attrvp
  71  *           |
  72  *           |    +---------------------+
  73  *           +--->| vnode_t for UFS|ZFS |
  74  *                | /dev/dsk/c0t0d0     |
  75  *                +---------------------+
  76  *
  77  * sdev is generally in memory. Therefore when a lookup happens and there is no
  78  * entry already inside of a directory cache, it will next check the backing
  79  * store. If the backing store exists, we will reconstitute the sdev_node based
  80  * on the information that we persisted. When we create the backing store node,
  81  * we use the struct vattr information that we already have in sdev_node_t.
  82  * Because of this, we already know if the entry was previously a symlink,
  83  * directory, or some other kind of type. Note that not all types of nodes are
  84  * supported. Currently only VDIR, VCHR, VBLK, VREG, VDOOR, and VLNK are
  85  * eligible to be persisted.
  86  *
  87  * When the sdev_node is created and the lookup is done, we grab a hold on the
  88  * underlying vnode as part of the call to VOP_LOOKUP. That reference is held
  89  * until the sdev_node becomes inactive. Once its reference count reaches one
  90  * and the VOP_INACTIVE callback fires leading to the destruction of the node,
  91  * the reference on the underlying vnode will be released.
  92  *
  93  * The backing store node will be deleted only when the node itself is deleted
  94  * through the means of a VOP_REMOVE, VOP_RMDIR, or similar call.
  95  *
  96  * Not everything can be persisted, see The Rules section for more details.
  97  *
  98  * Dynamic Nodes
  99  * -------------
 100  *
 101  * Dynamic nodes allow for specific interactions with various kernel subsystems
 102  * when looking up directory entries. This allows the lookup and readdir
 103  * functions to check against the kernel subsystem's for validity. eg. does a
 104  * zvol or nic still exist.
 105  *
 106  * More specifically, when we create various directories we check if the
 107  * directory name matches that of one of the names in the vtab[] (sdev_subr.c).
 108  * If it does, we swap out the vnode operations into a new set which combine the
 109  * normal sdev vnode operations with the dynamic set here.
 110  *
 111  * In addition, various dynamic nodes implement a verification entry point. This
 112  * verification entry is used as a part of lookup and readdir. The goal for
 113  * these dynamic nodes is to allow them to check with the underlying subsystems
 114  * to ensure that these devices are still present, or if they have gone away, to
 115  * remove them from the results. This is indicated by using the SDEV_VTOR flag
 116  * in vtab[].
 117  *
 118  * Dynamic nodes have additional restrictions placed upon them. They may only
 119  * appear at the top level directory of the file system. In addition, users
 120  * cannot create dirents below any leve of a dynamic node aside from its special
 121  * vnops.
 122  *
 123  * Profiles
 124  * --------
 125  *
 126  * Profiles exist for the purpose of non-global zones. They work with the zone
 127  * brands and zoneadmd to set up a filter of allowed devices that can appear in
 128  * a non-global zone's /dev. These are sent to sdev by means of libdevinfo and a
 129  * modctl system call. Specifically it allows one to add patterns of device
 130  * paths to include and exclude. It allows for a collection of symlinks to be
 131  * added and it allows for remapping names.
 132  *
 133  * When operating in a non-global zone, several of the sdev vnops are redirected
 134  * to the profile versions. These impose additional restrictions such as
 135  * enforcing that a non-global zone's /dev is read only.
 136  *
 137  * sdev_node_t States
 138  * ------------------
 139  *
 140  * A given sdev_node_t has a field called the sdev_state which describes where
 141  * in the sdev life cycle it is. There are three primary states: SDEV_INIT,
 142  * SDEV_READY, and SDEV_ZOMBIE.
 143  *
 144  *      SDEV_INIT: When a new /dev file is first looked up, a sdev_node
 145  *                 is allocated, initialized and added to the directory's
 146  *                 sdev_node cache. A node at this state will also
 147  *                 have the SDEV_LOOKUP flag set.
 148  *
 149  *                 Other threads that are trying to look up a node at
 150  *                 this state will be blocked until the SDEV_LOOKUP flag
 151  *                 is cleared.
 152  *
 153  *                 When the SDEV_LOOKUP flag is cleared, the node may
 154  *                 transition into the SDEV_READY state for a successful
 155  *                 lookup or the node is removed from the directory cache
 156  *                 and destroyed if the named node can not be found.
 157  *                 An ENOENT error is returned for the second case.
 158  *
 159  *      SDEV_READY: A /dev file has been successfully looked up and
 160  *                  associated with a vnode. The /dev file is available
 161  *                  for the supported /dev file system operations.
 162  *
 163  *      SDEV_ZOMBIE: Deletion of a /dev file has been explicitly issued
 164  *                  to an SDEV_READY node. The node is transitioned into
 165  *                  the SDEV_ZOMBIE state if the vnode reference count
 166  *                  is still held. A SDEV_ZOMBIE node does not support
 167  *                  any of the /dev file system operations. A SDEV_ZOMBIE
 168  *                  node is immediately removed from the directory cache
 169  *                  and destroyed once the reference count reaches zero.
 170  *
 171  * Historically nodes that were marked SDEV_ZOMBIE were not removed from the
 172  * underlying directory caches. This has been the source of numerous bugs and
 173  * thus to better mimic what happens on a real file system, it is no longer the
 174  * case.
 175  *
 176  * The following state machine describes the life cycle of a given node and its
 177  * associated states:
 178  *
 179  * node is . . . . .
 180  * allocated via   .     +-------------+         . . . . . . . vnode_t refcount
 181  * sdev_nodeinit() .     | Unallocated |         .             reaches zero and
 182  *        +--------*-----|   Memory    |<--------*---+         sdev_inactive is
 183  *        |              +-------------+             |         called.
 184  *        |       +------------^                     |         called.
 185  *        v       |                                  |
 186  *  +-----------+ * . . sdev_nodeready()      +-------------+
 187  *  | SDEV_INIT | |     or related setup      | SDEV_ZOMBIE |
 188  *  +-----------+ |     failure               +-------------+
 189  *        |       |                                  ^
 190  *        |       |      +------------+              |
 191  *        +-*----------->| SDEV_READY |--------*-----+
 192  *          .            +------------+        .          The node is no longer
 193  *          . . node successfully              . . . . .  valid or we've been
 194  *              inserted into the                         asked to remove it.
 195  *              directory cache                           This happens via
 196  *              and sdev_nodready()                       sdev_dirdelete().
 197  *              call successful.
 198  *
 199  * Adding and Removing Dirents, Zombie Nodes
 200  * -----------------------------------------
 201  *
 202  * As part of doing a lookup, readdir, or an explicit creation operation like
 203  * mkdir or create, nodes may be created. Every directory has an avl tree which
 204  * contains its children, the sdev_entries tree. This is only used if the type
 205  * is VDIR. Access to this is controlled by the sdev_node_t's contents_lock and
 206  * it is managed through sdev_cache_update().
 207  *
 208  * Every sdev_node_t has a field sdev_state, which describes the current state
 209  * of the node. A node is generally speaking in the SDEV_READY state. When it is
 210  * there, it can be looked up, accessed, and operations performed on it. When a
 211  * node is going to be removed from the directory cache it is marked as a
 212  * zombie. Once a node becomes a zombie, no other file system operations will
 213  * succeed and it will continue to exist as a node until the vnode count on the
 214  * node reaches zero. At that point, the node will be freed.  However, once a
 215  * node has been marked as a zombie, it will be removed immediately from the
 216  * directory cache such that no one else may find it again.  This means that
 217  * someone else can insert a new entry into that directory with the same name
 218  * and without a problem.
 219  *
 220  * To remove a node, see the section on that in The Rules.
 221  *
 222  * The Rules
 223  * ---------
 224  * These are the rules to live by when working in sdev. These are not
 225  * exhaustive.
 226  *
 227  * - Set 1: Working with Backing Nodes
 228  *   o If there is a SDEV_READY sdev_node_t, it knows about its backing node.
 229  *   o If we find a backing node when looking up an sdev_node_t for the first
 230  *     time, we use its attributes to build our sdev_node_t.
 231  *   o If there is a found backing node, or we create a backing node, that's
 232  *     when we grab the hold on its vnode.
 233  *   o If we mark an sdev_node_t a ZOMBIE, we must remove its backing node from
 234  *     the underlying file system. It must not be searchable or findable.
 235  *   o We release our hold on the backing node vnode when we destroy the
 236  *     sdev_node_t.
 237  *
 238  * - Set 2: Locking rules for sdev (not exhaustive)
 239  *   o The majority of nodes contain an sdev_contents rw lock. You must hold it
 240  *     for read or write if manipulating its contents appropriately.
 241  *   o You must lock your parent before yourself.
 242  *   o If you need your vnode's v_lock and the sdev_contents rw lock, you must
 243  *     grab the v_lock before the sdev_contents rw_lock.
 244  *   o If you release a lock on the node as a part of upgrading it, you must
 245  *     verify that the node has not become a zombie as a part of this process.
 246  *
 247  * - Set 3: Zombie Status and What it Means
 248  *   o If you encounter a node that is a ZOMBIE, that means that it has been
 249  *     unlinked from the backing store.
 250  *   o If you release your contents lock and acquire it again (say as part of
 251  *     trying to grab a write lock) you must check that the node has not become
 252  *     a zombie.
 253  *   o You should VERIFY that a looked up node is not a zombie. This follows
 254  *     from the following logic. To mark something as a zombie means that it is
 255  *     removed from the parents directory cache. To do that, you must have a
 256  *     write lock on the parent's sdev_contents. To lookup through that
 257  *     directory you must have a read lock. This then becomes a simple ordering
 258  *     problem. If you've been granted the lock then the other operation cannot
 259  *     be in progress or must have already succeeded.
 260  *
 261  * - Set 4: Removing Directory Entries (aka making nodes Zombies)
 262  *   o Write lock must be held on the directory
 263  *   o Write lock must be held on the node
 264  *   o Remove the sdev_node_t from its parent cache
 265  *   o Remove the corresponding backing store node, if it exists, eg. use
 266  *     VOP_REMOVE or VOP_RMDIR.
 267  *   o You must NOT make any change in the vnode reference count! Nodes should
 268  *     only be cleaned up through VOP_INACTIVE callbacks.
 269  *   o VOP_INACTIVE is the only one responsible for doing the final vn_rele of
 270  *     the backing store vnode that was grabbed during lookup.
 271  *
 272  * - Set 5: What Nodes may be Persisted
 273  *   o The root, /dev is always persisted
 274  *   o Any node in vtab which is marked SDEV_DYNAMIC, may not be persisted
 275  *     unless it is also marked SDEV_PERSIST
 276  *   o Anything whose parent directory is marked SDEV_PERSIST will pass that
 277  *     along to the child as long as it does not contradict the above rules
 278  */
 279 
 280 #include <sys/types.h>
 281 #include <sys/param.h>
 282 #include <sys/t_lock.h>
 283 #include <sys/systm.h>
 284 #include <sys/sysmacros.h>
 285 #include <sys/user.h>
 286 #include <sys/time.h>
 287 #include <sys/vfs.h>
 288 #include <sys/vnode.h>
 289 #include <sys/vfs_opreg.h>
 290 #include <sys/file.h>
 291 #include <sys/fcntl.h>
 292 #include <sys/flock.h>
 293 #include <sys/kmem.h>
 294 #include <sys/uio.h>
 295 #include <sys/errno.h>
 296 #include <sys/stat.h>
 297 #include <sys/cred.h>
 298 #include <sys/dirent.h>
 299 #include <sys/pathname.h>
 300 #include <sys/cmn_err.h>
 301 #include <sys/debug.h>
 302 #include <sys/policy.h>
 303 #include <vm/hat.h>
 304 #include <vm/seg_vn.h>
 305 #include <vm/seg_map.h>
 306 #include <vm/seg.h>
 307 #include <vm/as.h>
 308 #include <vm/page.h>
 309 #include <sys/proc.h>
 310 #include <sys/mode.h>
 311 #include <sys/sunndi.h>
 312 #include <sys/ptms.h>
 313 #include <fs/fs_subr.h>
 314 #include <sys/fs/dv_node.h>
 315 #include <sys/fs/sdev_impl.h>
 316 
 317 /*ARGSUSED*/
 318 static int
 319 sdev_open(struct vnode **vpp, int flag, struct cred *cred, caller_context_t *ct)
 320 {
 321         struct sdev_node *dv = VTOSDEV(*vpp);
 322         struct sdev_node *ddv = dv->sdev_dotdot;
 323         int error = 0;
 324 
 325         if ((*vpp)->v_type == VDIR)
 326                 return (0);
 327 
 328         if (!SDEV_IS_GLOBAL(dv))
 329                 return (ENOTSUP);
 330 
 331         if ((*vpp)->v_type == VLNK)
 332                 return (ENOENT);
 333         ASSERT((*vpp)->v_type == VREG);
 334         if ((*vpp)->v_type != VREG)
 335                 return (ENOTSUP);
 336 
 337         ASSERT(ddv);
 338         rw_enter(&ddv->sdev_contents, RW_READER);
 339         if (dv->sdev_attrvp == NULL) {
 340                 rw_exit(&ddv->sdev_contents);
 341                 return (ENOENT);
 342         }
 343         error = VOP_OPEN(&(dv->sdev_attrvp), flag, cred, ct);
 344         rw_exit(&ddv->sdev_contents);
 345         return (error);
 346 }
 347 
 348 /*ARGSUSED1*/
 349 static int
 350 sdev_close(struct vnode *vp, int flag, int count,
 351     offset_t offset, struct cred *cred, caller_context_t *ct)
 352 {
 353         struct sdev_node *dv = VTOSDEV(vp);
 354 
 355         if (vp->v_type == VDIR) {
 356                 cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
 357                 cleanshares(vp, ttoproc(curthread)->p_pid);
 358                 return (0);
 359         }
 360 
 361         if (!SDEV_IS_GLOBAL(dv))
 362                 return (ENOTSUP);
 363 
 364         ASSERT(vp->v_type == VREG);
 365         if (vp->v_type != VREG)
 366                 return (ENOTSUP);
 367 
 368         ASSERT(dv->sdev_attrvp);
 369         return (VOP_CLOSE(dv->sdev_attrvp, flag, count, offset, cred, ct));
 370 }
 371 
 372 /*ARGSUSED*/
 373 static int
 374 sdev_read(struct vnode *vp, struct uio *uio, int ioflag, struct cred *cred,
 375         struct caller_context *ct)
 376 {
 377         struct sdev_node *dv = (struct sdev_node *)VTOSDEV(vp);
 378         int     error;
 379 
 380         if (!SDEV_IS_GLOBAL(dv))
 381                 return (EINVAL);
 382 
 383         if (vp->v_type == VDIR)
 384                 return (EISDIR);
 385 
 386         /* only supporting regular files in /dev */
 387         ASSERT(vp->v_type == VREG);
 388         if (vp->v_type != VREG)
 389                 return (EINVAL);
 390 
 391         ASSERT(RW_READ_HELD(&VTOSDEV(vp)->sdev_contents));
 392         ASSERT(dv->sdev_attrvp);
 393         (void) VOP_RWLOCK(dv->sdev_attrvp, 0, ct);
 394         error = VOP_READ(dv->sdev_attrvp, uio, ioflag, cred, ct);
 395         VOP_RWUNLOCK(dv->sdev_attrvp, 0, ct);
 396         return (error);
 397 }
 398 
 399 /*ARGSUSED*/
 400 static int
 401 sdev_write(struct vnode *vp, struct uio *uio, int ioflag, struct cred *cred,
 402         struct caller_context *ct)
 403 {
 404         struct sdev_node *dv = VTOSDEV(vp);
 405         int     error = 0;
 406 
 407         if (!SDEV_IS_GLOBAL(dv))
 408                 return (EINVAL);
 409 
 410         if (vp->v_type == VDIR)
 411                 return (EISDIR);
 412 
 413         /* only supporting regular files in /dev */
 414         ASSERT(vp->v_type == VREG);
 415         if (vp->v_type != VREG)
 416                 return (EINVAL);
 417 
 418         ASSERT(dv->sdev_attrvp);
 419 
 420         (void) VOP_RWLOCK(dv->sdev_attrvp, 1, ct);
 421         error = VOP_WRITE(dv->sdev_attrvp, uio, ioflag, cred, ct);
 422         VOP_RWUNLOCK(dv->sdev_attrvp, 1, ct);
 423         if (error == 0) {
 424                 sdev_update_timestamps(dv->sdev_attrvp, kcred,
 425                     AT_MTIME);
 426         }
 427         return (error);
 428 }
 429 
 430 /*ARGSUSED*/
 431 static int
 432 sdev_ioctl(struct vnode *vp, int cmd, intptr_t arg, int flag,
 433     struct cred *cred, int *rvalp,  caller_context_t *ct)
 434 {
 435         struct sdev_node *dv = VTOSDEV(vp);
 436 
 437         if (!SDEV_IS_GLOBAL(dv) || (vp->v_type == VDIR))
 438                 return (ENOTTY);
 439 
 440         ASSERT(vp->v_type == VREG);
 441         if (vp->v_type != VREG)
 442                 return (EINVAL);
 443 
 444         ASSERT(dv->sdev_attrvp);
 445         return (VOP_IOCTL(dv->sdev_attrvp, cmd, arg, flag, cred, rvalp, ct));
 446 }
 447 
 448 static int
 449 sdev_getattr(struct vnode *vp, struct vattr *vap, int flags,
 450     struct cred *cr, caller_context_t *ct)
 451 {
 452         int                     error = 0;
 453         struct sdev_node        *dv = VTOSDEV(vp);
 454         struct sdev_node        *parent = dv->sdev_dotdot;
 455 
 456         ASSERT(parent);
 457 
 458         rw_enter(&parent->sdev_contents, RW_READER);
 459         ASSERT(dv->sdev_attr || dv->sdev_attrvp);
 460 
 461         /*
 462          * search order:
 463          *      - for persistent nodes (SDEV_PERSIST): backstore
 464          *      - for non-persistent nodes: module ops if global, then memory
 465          */
 466         if (dv->sdev_attrvp) {
 467                 rw_exit(&parent->sdev_contents);
 468                 error = VOP_GETATTR(dv->sdev_attrvp, vap, flags, cr, ct);
 469                 sdev_vattr_merge(dv, vap);
 470         } else {
 471                 ASSERT(dv->sdev_attr);
 472                 *vap = *dv->sdev_attr;
 473                 sdev_vattr_merge(dv, vap);
 474                 rw_exit(&parent->sdev_contents);
 475         }
 476 
 477         return (error);
 478 }
 479 
 480 /*ARGSUSED4*/
 481 static int
 482 sdev_setattr(struct vnode *vp, struct vattr *vap, int flags,
 483     struct cred *cred, caller_context_t *ctp)
 484 {
 485         return (devname_setattr_func(vp, vap, flags, cred, NULL, 0));
 486 }
 487 
 488 static int
 489 sdev_getsecattr(struct vnode *vp, struct vsecattr *vsap, int flags,
 490     struct cred *cr, caller_context_t *ct)
 491 {
 492         int     error;
 493         struct sdev_node *dv = VTOSDEV(vp);
 494         struct vnode *avp = dv->sdev_attrvp;
 495 
 496         if (avp == NULL) {
 497                 /* return fs_fab_acl() if flavor matches, else do nothing */
 498                 if ((SDEV_ACL_FLAVOR(vp) == _ACL_ACLENT_ENABLED &&
 499                     (vsap->vsa_mask & (VSA_ACLCNT | VSA_DFACLCNT))) ||
 500                     (SDEV_ACL_FLAVOR(vp) == _ACL_ACE_ENABLED &&
 501                     (vsap->vsa_mask & (VSA_ACECNT | VSA_ACE))))
 502                         return (fs_fab_acl(vp, vsap, flags, cr, ct));
 503 
 504                 return (ENOSYS);
 505         }
 506 
 507         (void) VOP_RWLOCK(avp, 1, ct);
 508         error = VOP_GETSECATTR(avp, vsap, flags, cr, ct);
 509         VOP_RWUNLOCK(avp, 1, ct);
 510         return (error);
 511 }
 512 
 513 static int
 514 sdev_setsecattr(struct vnode *vp, struct vsecattr *vsap, int flags,
 515     struct cred *cr, caller_context_t *ct)
 516 {
 517         int     error;
 518         struct sdev_node *dv = VTOSDEV(vp);
 519         struct vnode *avp = dv->sdev_attrvp;
 520 
 521         if (dv->sdev_state == SDEV_ZOMBIE)
 522                 return (0);
 523 
 524         if (avp == NULL) {
 525                 if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_PERSIST(dv))
 526                         return (fs_nosys());
 527                 ASSERT(dv->sdev_attr);
 528                 /*
 529                  * if coming in directly, the acl system call will
 530                  * have held the read-write lock via VOP_RWLOCK()
 531                  * If coming in via specfs, specfs will have
 532                  * held the rw lock on the realvp i.e. us.
 533                  */
 534                 ASSERT(RW_WRITE_HELD(&dv->sdev_contents));
 535                 sdev_vattr_merge(dv, dv->sdev_attr);
 536                 error = sdev_shadow_node(dv, cr);
 537                 if (error) {
 538                         return (fs_nosys());
 539                 }
 540 
 541                 ASSERT(dv->sdev_attrvp);
 542                 /* clean out the memory copy if any */
 543                 if (dv->sdev_attr) {
 544                         kmem_free(dv->sdev_attr, sizeof (struct vattr));
 545                         dv->sdev_attr = NULL;
 546                 }
 547                 avp = dv->sdev_attrvp;
 548         }
 549         ASSERT(avp);
 550 
 551         (void) VOP_RWLOCK(avp, V_WRITELOCK_TRUE, ct);
 552         error = VOP_SETSECATTR(avp, vsap, flags, cr, ct);
 553         VOP_RWUNLOCK(avp, V_WRITELOCK_TRUE, ct);
 554         return (error);
 555 }
 556 
 557 /*
 558  * There are two different unlocked routines. This one is not static as it is
 559  * used as part of the secpolicy_vnode_setattr calls in sdev_subr.c. Because it
 560  * is used in that function it has to have a specific signature.
 561  */
 562 int
 563 sdev_unlocked_access(void *vdv, int mode, struct cred *cr)
 564 {
 565         struct sdev_node        *dv = vdv;
 566         int                     shift = 0;
 567         uid_t                   owner = dv->sdev_attr->va_uid;
 568 
 569         if (crgetuid(cr) != owner) {
 570                 shift += 3;
 571                 if (groupmember(dv->sdev_attr->va_gid, cr) == 0)
 572                         shift += 3;
 573         }
 574 
 575         return (secpolicy_vnode_access2(cr, SDEVTOV(dv), owner,
 576             dv->sdev_attr->va_mode << shift, mode));
 577 }
 578 
 579 static int
 580 sdev_self_access(sdev_node_t *dv, int mode, int flags, struct cred *cr,
 581     caller_context_t *ct)
 582 {
 583         int ret;
 584 
 585         ASSERT(dv->sdev_attr || dv->sdev_attrvp);
 586         if (dv->sdev_attrvp) {
 587                 ret = VOP_ACCESS(dv->sdev_attrvp, mode, flags, cr, ct);
 588         } else if (dv->sdev_attr) {
 589                 ret = sdev_unlocked_access(dv, mode, cr);
 590                 if (ret)
 591                         ret = EACCES;
 592         }
 593 
 594         return (ret);
 595 }
 596 
 597 static int
 598 sdev_access(struct vnode *vp, int mode, int flags, struct cred *cr,
 599     caller_context_t *ct)
 600 {
 601         struct sdev_node *dv = VTOSDEV(vp);
 602         int ret;
 603 
 604         rw_enter(&dv->sdev_contents, RW_READER);
 605         ret = sdev_self_access(dv, mode, flags, cr, ct);
 606         rw_exit(&dv->sdev_contents);
 607 
 608         return (ret);
 609 }
 610 
 611 /*
 612  * Lookup
 613  */
 614 /*ARGSUSED3*/
 615 static int
 616 sdev_lookup(struct vnode *dvp, char *nm, struct vnode **vpp,
 617     struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred,
 618     caller_context_t *ct, int *direntflags, pathname_t *realpnp)
 619 {
 620         struct sdev_node *parent;
 621         int error;
 622 
 623         parent = VTOSDEV(dvp);
 624         ASSERT(parent);
 625 
 626         /* execute access is required to search the directory */
 627         if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0)
 628                 return (error);
 629 
 630         if (!SDEV_IS_GLOBAL(parent))
 631                 return (prof_lookup(dvp, nm, vpp, cred));
 632         return (devname_lookup_func(parent, nm, vpp, cred, NULL, 0));
 633 }
 634 
 635 /*ARGSUSED2*/
 636 static int
 637 sdev_create(struct vnode *dvp, char *nm, struct vattr *vap, vcexcl_t excl,
 638     int mode, struct vnode **vpp, struct cred *cred, int flag,
 639     caller_context_t *ct, vsecattr_t *vsecp)
 640 {
 641         struct vnode            *vp = NULL;
 642         struct vnode            *avp;
 643         struct sdev_node        *parent;
 644         struct sdev_node        *self = NULL;
 645         int                     error = 0;
 646         vtype_t                 type = vap->va_type;
 647 
 648         ASSERT(type != VNON && type != VBAD);
 649 
 650         if ((type == VFIFO) || (type == VSOCK) ||
 651             (type == VPROC) || (type == VPORT))
 652                 return (ENOTSUP);
 653 
 654         parent = VTOSDEV(dvp);
 655         ASSERT(parent);
 656 
 657         rw_enter(&parent->sdev_dotdot->sdev_contents, RW_READER);
 658         if (parent->sdev_state == SDEV_ZOMBIE) {
 659                 rw_exit(&parent->sdev_dotdot->sdev_contents);
 660                 return (ENOENT);
 661         }
 662 
 663         /* non-global do not allow pure node creation */
 664         if (!SDEV_IS_GLOBAL(parent)) {
 665                 rw_exit(&parent->sdev_dotdot->sdev_contents);
 666                 return (prof_lookup(dvp, nm, vpp, cred));
 667         }
 668         rw_exit(&parent->sdev_dotdot->sdev_contents);
 669 
 670         /* execute access is required to search the directory */
 671         if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0)
 672                 return (error);
 673 
 674         /* check existing name */
 675 /* XXXci - We may need to translate the C-I flags on VOP_LOOKUP */
 676         error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cred, ct, NULL, NULL);
 677 
 678         /* name found */
 679         if (error == 0) {
 680                 ASSERT(vp);
 681                 if (excl == EXCL) {
 682                         error = EEXIST;
 683                 } else if ((vp->v_type == VDIR) && (mode & VWRITE)) {
 684                         /* allowing create/read-only an existing directory */
 685                         error = EISDIR;
 686                 } else {
 687                         error = VOP_ACCESS(vp, mode, 0, cred, ct);
 688                 }
 689 
 690                 if (error) {
 691                         VN_RELE(vp);
 692                         return (error);
 693                 }
 694 
 695                 /* truncation first */
 696                 if ((vp->v_type == VREG) && (vap->va_mask & AT_SIZE) &&
 697                     (vap->va_size == 0)) {
 698                         ASSERT(parent->sdev_attrvp);
 699                         error = VOP_CREATE(parent->sdev_attrvp,
 700                             nm, vap, excl, mode, &avp, cred, flag, ct, vsecp);
 701 
 702                         if (error) {
 703                                 VN_RELE(vp);
 704                                 return (error);
 705                         }
 706                 }
 707 
 708                 sdev_update_timestamps(vp, kcred,
 709                     AT_CTIME|AT_MTIME|AT_ATIME);
 710                 *vpp = vp;
 711                 return (0);
 712         }
 713 
 714         /* bail out early */
 715         if (error != ENOENT)
 716                 return (error);
 717 
 718         /* verify write access - compliance specifies ENXIO */
 719         if ((error = VOP_ACCESS(dvp, VEXEC|VWRITE, 0, cred, ct)) != 0) {
 720                 if (error == EACCES)
 721                         error = ENXIO;
 722                 return (error);
 723         }
 724 
 725         /*
 726          * For memory-based (ROFS) directory:
 727          *      - either disallow node creation;
 728          *      - or implement VOP_CREATE of its own
 729          */
 730         rw_enter(&parent->sdev_contents, RW_WRITER);
 731         if (!SDEV_IS_PERSIST(parent)) {
 732                 rw_exit(&parent->sdev_contents);
 733                 return (ENOTSUP);
 734         }
 735         ASSERT(parent->sdev_attrvp);
 736         error = sdev_mknode(parent, nm, &self, vap, NULL, NULL,
 737             cred, SDEV_READY);
 738         if (error) {
 739                 rw_exit(&parent->sdev_contents);
 740                 if (self)
 741                         SDEV_RELE(self);
 742                 return (error);
 743         }
 744         rw_exit(&parent->sdev_contents);
 745 
 746         ASSERT(self);
 747         /* take care the timestamps for the node and its parent */
 748         sdev_update_timestamps(SDEVTOV(self), kcred,
 749             AT_CTIME|AT_MTIME|AT_ATIME);
 750         sdev_update_timestamps(dvp, kcred, AT_MTIME|AT_ATIME);
 751         if (SDEV_IS_GLOBAL(parent))
 752                 atomic_inc_ulong(&parent->sdev_gdir_gen);
 753 
 754         /* wake up other threads blocked on looking up this node */
 755         mutex_enter(&self->sdev_lookup_lock);
 756         SDEV_UNBLOCK_OTHERS(self, SDEV_LOOKUP);
 757         mutex_exit(&self->sdev_lookup_lock);
 758         error = sdev_to_vp(self, vpp);
 759         return (error);
 760 }
 761 
 762 static int
 763 sdev_remove(struct vnode *dvp, char *nm, struct cred *cred,
 764     caller_context_t *ct, int flags)
 765 {
 766         int     error;
 767         struct sdev_node *parent = (struct sdev_node *)VTOSDEV(dvp);
 768         struct vnode *vp = NULL;
 769         struct sdev_node *dv = NULL;
 770         int len;
 771         int bkstore;
 772 
 773         /* bail out early */
 774         len = strlen(nm);
 775         if (nm[0] == '.') {
 776                 if (len == 1) {
 777                         return (EINVAL);
 778                 } else if (len == 2 && nm[1] == '.') {
 779                         return (EEXIST);
 780                 }
 781         }
 782 
 783         ASSERT(parent);
 784         rw_enter(&parent->sdev_contents, RW_READER);
 785         if (!SDEV_IS_GLOBAL(parent)) {
 786                 rw_exit(&parent->sdev_contents);
 787                 return (ENOTSUP);
 788         }
 789 
 790         /* execute access is required to search the directory */
 791         if ((error = sdev_self_access(parent, VEXEC, 0, cred, ct)) != 0) {
 792                 rw_exit(&parent->sdev_contents);
 793                 return (error);
 794         }
 795 
 796         /* check existence first */
 797         dv = sdev_cache_lookup(parent, nm);
 798         if (dv == NULL) {
 799                 rw_exit(&parent->sdev_contents);
 800                 return (ENOENT);
 801         }
 802 
 803         vp = SDEVTOV(dv);
 804         if ((dv->sdev_state == SDEV_INIT) ||
 805             (dv->sdev_state == SDEV_ZOMBIE)) {
 806                 rw_exit(&parent->sdev_contents);
 807                 VN_RELE(vp);
 808                 return (ENOENT);
 809         }
 810 
 811         /* write access is required to remove an entry */
 812         if ((error = sdev_self_access(parent, VWRITE, 0, cred, ct)) != 0) {
 813                 rw_exit(&parent->sdev_contents);
 814                 VN_RELE(vp);
 815                 return (error);
 816         }
 817 
 818         bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0;
 819         if (!rw_tryupgrade(&parent->sdev_contents)) {
 820                 rw_exit(&parent->sdev_contents);
 821                 rw_enter(&parent->sdev_contents, RW_WRITER);
 822                 /* Make sure we didn't become a zombie */
 823                 if (parent->sdev_state == SDEV_ZOMBIE) {
 824                         rw_exit(&parent->sdev_contents);
 825                         VN_RELE(vp);
 826                         return (ENOENT);
 827                 }
 828         }
 829 
 830         /* we do not support unlinking a non-empty directory */
 831         if (vp->v_type == VDIR && dv->sdev_nlink > 2) {
 832                 rw_exit(&parent->sdev_contents);
 833                 VN_RELE(vp);
 834                 return (EBUSY);
 835         }
 836 
 837         /*
 838          * sdev_dirdelete does the real job of:
 839          *  - make sure no open ref count
 840          *  - destroying the sdev_node
 841          *  - releasing the hold on attrvp
 842          */
 843         sdev_cache_update(parent, &dv, nm, SDEV_CACHE_DELETE);
 844         VN_RELE(vp);
 845         rw_exit(&parent->sdev_contents);
 846 
 847         /*
 848          * best efforts clean up the backing store
 849          */
 850         if (bkstore) {
 851                 ASSERT(parent->sdev_attrvp);
 852                 error = VOP_REMOVE(parent->sdev_attrvp, nm, cred,
 853                     ct, flags);
 854                 /*
 855                  * do not report BUSY error
 856                  * because the backing store ref count is released
 857                  * when the last ref count on the sdev_node is
 858                  * released.
 859                  */
 860                 if (error == EBUSY) {
 861                         sdcmn_err2(("sdev_remove: device %s is still on"
 862                             "disk %s\n", nm, parent->sdev_path));
 863                         error = 0;
 864                 }
 865         }
 866 
 867         if (error == 0)
 868                 i_ddi_di_cache_invalidate();
 869 
 870         return (error);
 871 }
 872 
 873 /*
 874  * Some restrictions for this file system:
 875  *  - both oldnm and newnm are in the scope of /dev file system,
 876  *    to simply the namespace management model.
 877  */
 878 /*ARGSUSED6*/
 879 static int
 880 sdev_rename(struct vnode *odvp, char *onm, struct vnode *ndvp, char *nnm,
 881     struct cred *cred, caller_context_t *ct, int flags)
 882 {
 883         struct sdev_node        *fromparent = NULL;
 884         struct vattr            vattr;
 885         struct sdev_node        *toparent;
 886         struct sdev_node        *fromdv = NULL; /* source node */
 887         struct vnode            *ovp = NULL;    /* source vnode */
 888         struct sdev_node        *todv = NULL;   /* destination node */
 889         struct vnode            *nvp = NULL;    /* destination vnode */
 890         int                     samedir = 0;    /* set if odvp == ndvp */
 891         struct vnode            *realvp;
 892         int error = 0;
 893         dev_t fsid;
 894         int bkstore = 0;
 895         vtype_t type;
 896 
 897         /* prevent modifying "." and ".." */
 898         if ((onm[0] == '.' &&
 899             (onm[1] == '\0' || (onm[1] == '.' && onm[2] == '\0'))) ||
 900             (nnm[0] == '.' &&
 901             (nnm[1] == '\0' || (nnm[1] == '.' && nnm[2] == '\0')))) {
 902                 return (EINVAL);
 903         }
 904 
 905         fromparent = VTOSDEV(odvp);
 906         toparent = VTOSDEV(ndvp);
 907 
 908         /* ZOMBIE parent doesn't allow new node creation */
 909         rw_enter(&fromparent->sdev_dotdot->sdev_contents, RW_READER);
 910         if (fromparent->sdev_state == SDEV_ZOMBIE) {
 911                 rw_exit(&fromparent->sdev_dotdot->sdev_contents);
 912                 return (ENOENT);
 913         }
 914 
 915         /* renaming only supported for global device nodes */
 916         if (!SDEV_IS_GLOBAL(fromparent)) {
 917                 rw_exit(&fromparent->sdev_dotdot->sdev_contents);
 918                 return (ENOTSUP);
 919         }
 920         rw_exit(&fromparent->sdev_dotdot->sdev_contents);
 921 
 922         rw_enter(&toparent->sdev_dotdot->sdev_contents, RW_READER);
 923         if (toparent->sdev_state == SDEV_ZOMBIE) {
 924                 rw_exit(&toparent->sdev_dotdot->sdev_contents);
 925                 return (ENOENT);
 926         }
 927         rw_exit(&toparent->sdev_dotdot->sdev_contents);
 928 
 929         /*
 930          * acquire the global lock to prevent
 931          * mount/unmount/other rename activities.
 932          */
 933         mutex_enter(&sdev_lock);
 934 
 935         /* check existence of the source node */
 936 /* XXXci - We may need to translate the C-I flags on VOP_LOOKUP */
 937         error = VOP_LOOKUP(odvp, onm, &ovp, NULL, 0, NULL, cred, ct,
 938             NULL, NULL);
 939         if (error) {
 940                 sdcmn_err2(("sdev_rename: the source node %s exists\n",
 941                     onm));
 942                 mutex_exit(&sdev_lock);
 943                 return (error);
 944         }
 945 
 946         if (VOP_REALVP(ovp, &realvp, ct) == 0) {
 947                 VN_HOLD(realvp);
 948                 VN_RELE(ovp);
 949                 ovp = realvp;
 950         }
 951 
 952         /* check existence of destination */
 953 /* XXXci - We may need to translate the C-I flags on VOP_LOOKUP */
 954         error = VOP_LOOKUP(ndvp, nnm, &nvp, NULL, 0, NULL, cred, ct,
 955             NULL, NULL);
 956         if (error && (error != ENOENT)) {
 957                 mutex_exit(&sdev_lock);
 958                 VN_RELE(ovp);
 959                 return (error);
 960         }
 961 
 962         if (nvp && (VOP_REALVP(nvp, &realvp, ct) == 0)) {
 963                 VN_HOLD(realvp);
 964                 VN_RELE(nvp);
 965                 nvp = realvp;
 966         }
 967 
 968         /*
 969          * make sure the source and the destination are
 970          * in the same dev filesystem
 971          */
 972         if (odvp != ndvp) {
 973                 vattr.va_mask = AT_FSID;
 974                 if (error = VOP_GETATTR(odvp, &vattr, 0, cred, ct)) {
 975                         mutex_exit(&sdev_lock);
 976                         VN_RELE(ovp);
 977                         if (nvp != NULL)
 978                                 VN_RELE(nvp);
 979                         return (error);
 980                 }
 981                 fsid = vattr.va_fsid;
 982                 vattr.va_mask = AT_FSID;
 983                 if (error = VOP_GETATTR(ndvp, &vattr, 0, cred, ct)) {
 984                         mutex_exit(&sdev_lock);
 985                         VN_RELE(ovp);
 986                         if (nvp != NULL)
 987                                 VN_RELE(nvp);
 988                         return (error);
 989                 }
 990                 if (fsid != vattr.va_fsid) {
 991                         mutex_exit(&sdev_lock);
 992                         VN_RELE(ovp);
 993                         if (nvp != NULL)
 994                                 VN_RELE(nvp);
 995                         return (EXDEV);
 996                 }
 997         }
 998 
 999         /* make sure the old entry can be deleted */
1000         error = VOP_ACCESS(odvp, VWRITE, 0, cred, ct);
1001         if (error) {
1002                 mutex_exit(&sdev_lock);
1003                 VN_RELE(ovp);
1004                 if (nvp != NULL)
1005                         VN_RELE(nvp);
1006                 return (error);
1007         }
1008 
1009         /* make sure the destination allows creation */
1010         samedir = (fromparent == toparent);
1011         if (!samedir) {
1012                 error = VOP_ACCESS(ndvp, VEXEC|VWRITE, 0, cred, ct);
1013                 if (error) {
1014                         mutex_exit(&sdev_lock);
1015                         VN_RELE(ovp);
1016                         if (nvp != NULL)
1017                                 VN_RELE(nvp);
1018                         return (error);
1019                 }
1020         }
1021 
1022         fromdv = VTOSDEV(ovp);
1023         ASSERT(fromdv);
1024 
1025         /* destination file exists */
1026         if (nvp != NULL) {
1027                 todv = VTOSDEV(nvp);
1028                 ASSERT(todv);
1029         }
1030 
1031         if ((fromdv->sdev_flags & SDEV_DYNAMIC) != 0 ||
1032             (todv != NULL && (todv->sdev_flags & SDEV_DYNAMIC) != 0)) {
1033                 mutex_exit(&sdev_lock);
1034                 if (nvp != NULL)
1035                         VN_RELE(nvp);
1036                 VN_RELE(ovp);
1037                 return (EACCES);
1038         }
1039 
1040         /*
1041          * link source to new target in the memory. Regardless of failure, we
1042          * must rele our hold on nvp.
1043          */
1044         error = sdev_rnmnode(fromparent, fromdv, toparent, &todv, nnm, cred);
1045         if (nvp != NULL)
1046                 VN_RELE(nvp);
1047         if (error) {
1048                 sdcmn_err2(("sdev_rename: renaming %s to %s failed "
1049                     " with error %d\n", onm, nnm, error));
1050                 mutex_exit(&sdev_lock);
1051                 VN_RELE(ovp);
1052                 return (error);
1053         }
1054 
1055         /*
1056          * unlink from source
1057          */
1058         rw_enter(&fromparent->sdev_contents, RW_READER);
1059         fromdv = sdev_cache_lookup(fromparent, onm);
1060         if (fromdv == NULL) {
1061                 rw_exit(&fromparent->sdev_contents);
1062                 mutex_exit(&sdev_lock);
1063                 VN_RELE(ovp);
1064                 sdcmn_err2(("sdev_rename: the source is deleted already\n"));
1065                 return (0);
1066         }
1067 
1068         if (fromdv->sdev_state == SDEV_ZOMBIE) {
1069                 rw_exit(&fromparent->sdev_contents);
1070                 mutex_exit(&sdev_lock);
1071                 VN_RELE(SDEVTOV(fromdv));
1072                 VN_RELE(ovp);
1073                 sdcmn_err2(("sdev_rename: the source is being deleted\n"));
1074                 return (0);
1075         }
1076         rw_exit(&fromparent->sdev_contents);
1077         ASSERT(SDEVTOV(fromdv) == ovp);
1078         VN_RELE(ovp);
1079 
1080         /* clean out the directory contents before it can be removed */
1081         type = SDEVTOV(fromdv)->v_type;
1082         if (type == VDIR) {
1083                 error = sdev_cleandir(fromdv, NULL, 0);
1084                 sdcmn_err2(("sdev_rename: cleandir finished with %d\n",
1085                     error));
1086                 if (error == EBUSY)
1087                         error = 0;
1088         }
1089 
1090         rw_enter(&fromparent->sdev_contents, RW_WRITER);
1091         bkstore = SDEV_IS_PERSIST(fromdv) ? 1 : 0;
1092         sdev_cache_update(fromparent, &fromdv, onm,
1093             SDEV_CACHE_DELETE);
1094         VN_RELE(SDEVTOV(fromdv));
1095 
1096         /* best effforts clean up the backing store */
1097         if (bkstore) {
1098                 ASSERT(fromparent->sdev_attrvp);
1099                 if (type != VDIR) {
1100 /* XXXci - We may need to translate the C-I flags on VOP_REMOVE */
1101                         error = VOP_REMOVE(fromparent->sdev_attrvp,
1102                             onm, kcred, ct, 0);
1103                 } else {
1104 /* XXXci - We may need to translate the C-I flags on VOP_RMDIR */
1105                         error = VOP_RMDIR(fromparent->sdev_attrvp,
1106                             onm, fromparent->sdev_attrvp, kcred, ct, 0);
1107                 }
1108 
1109                 if (error) {
1110                         sdcmn_err2(("sdev_rename: device %s is "
1111                             "still on disk %s\n", onm,
1112                             fromparent->sdev_path));
1113                         error = 0;
1114                 }
1115         }
1116         rw_exit(&fromparent->sdev_contents);
1117         mutex_exit(&sdev_lock);
1118 
1119         /* once reached to this point, the rename is regarded successful */
1120         return (0);
1121 }
1122 
1123 /*
1124  * dev-fs version of "ln -s path dev-name"
1125  *      tnm - path, e.g. /devices/... or /dev/...
1126  *      lnm - dev_name
1127  */
1128 /*ARGSUSED6*/
1129 static int
1130 sdev_symlink(struct vnode *dvp, char *lnm, struct vattr *tva,
1131     char *tnm, struct cred *cred, caller_context_t *ct, int flags)
1132 {
1133         int error;
1134         struct vnode *vp = NULL;
1135         struct sdev_node *parent = (struct sdev_node *)VTOSDEV(dvp);
1136         struct sdev_node *self = (struct sdev_node *)NULL;
1137 
1138         ASSERT(parent);
1139         rw_enter(&parent->sdev_dotdot->sdev_contents, RW_READER);
1140         if (parent->sdev_state == SDEV_ZOMBIE) {
1141                 rw_exit(&parent->sdev_dotdot->sdev_contents);
1142                 sdcmn_err2(("sdev_symlink: parent %s is ZOMBIED \n",
1143                     parent->sdev_name));
1144                 return (ENOENT);
1145         }
1146 
1147         if (!SDEV_IS_GLOBAL(parent)) {
1148                 rw_exit(&parent->sdev_dotdot->sdev_contents);
1149                 return (ENOTSUP);
1150         }
1151         rw_exit(&parent->sdev_dotdot->sdev_contents);
1152 
1153         /* execute access is required to search a directory */
1154         if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0)
1155                 return (error);
1156 
1157         /* find existing name */
1158 /* XXXci - We may need to translate the C-I flags here */
1159         error = VOP_LOOKUP(dvp, lnm, &vp, NULL, 0, NULL, cred, ct, NULL, NULL);
1160         if (error == 0) {
1161                 ASSERT(vp);
1162                 VN_RELE(vp);
1163                 sdcmn_err2(("sdev_symlink: node %s already exists\n", lnm));
1164                 return (EEXIST);
1165         }
1166         if (error != ENOENT)
1167                 return (error);
1168 
1169         /* write access is required to create a symlink */
1170         if ((error = VOP_ACCESS(dvp, VWRITE, 0, cred, ct)) != 0)
1171                 return (error);
1172 
1173         /* put it into memory cache */
1174         rw_enter(&parent->sdev_contents, RW_WRITER);
1175         error = sdev_mknode(parent, lnm, &self, tva, NULL, (void *)tnm,
1176             cred, SDEV_READY);
1177         if (error) {
1178                 rw_exit(&parent->sdev_contents);
1179                 sdcmn_err2(("sdev_symlink: node %s creation failed\n", lnm));
1180                 if (self)
1181                         SDEV_RELE(self);
1182 
1183                 return (error);
1184         }
1185         ASSERT(self && (self->sdev_state == SDEV_READY));
1186         rw_exit(&parent->sdev_contents);
1187 
1188         /* take care the timestamps for the node and its parent */
1189         sdev_update_timestamps(SDEVTOV(self), kcred,
1190             AT_CTIME|AT_MTIME|AT_ATIME);
1191         sdev_update_timestamps(dvp, kcred, AT_MTIME|AT_ATIME);
1192         if (SDEV_IS_GLOBAL(parent))
1193                 atomic_inc_ulong(&parent->sdev_gdir_gen);
1194         i_ddi_di_cache_invalidate();
1195 
1196         /* wake up other threads blocked on looking up this node */
1197         mutex_enter(&self->sdev_lookup_lock);
1198         SDEV_UNBLOCK_OTHERS(self, SDEV_LOOKUP);
1199         mutex_exit(&self->sdev_lookup_lock);
1200         SDEV_RELE(self);        /* don't return with vnode held */
1201         return (0);
1202 }
1203 
1204 /*ARGSUSED6*/
1205 static int
1206 sdev_mkdir(struct vnode *dvp, char *nm, struct vattr *va, struct vnode **vpp,
1207     struct cred *cred, caller_context_t *ct, int flags, vsecattr_t *vsecp)
1208 {
1209         int error;
1210         struct sdev_node *parent = (struct sdev_node *)VTOSDEV(dvp);
1211         struct sdev_node *self = NULL;
1212         struct vnode    *vp = NULL;
1213 
1214         ASSERT(parent && parent->sdev_dotdot);
1215         rw_enter(&parent->sdev_dotdot->sdev_contents, RW_READER);
1216         if (parent->sdev_state == SDEV_ZOMBIE) {
1217                 rw_exit(&parent->sdev_dotdot->sdev_contents);
1218                 return (ENOENT);
1219         }
1220 
1221         /* non-global do not allow pure directory creation */
1222         if (!SDEV_IS_GLOBAL(parent)) {
1223                 rw_exit(&parent->sdev_dotdot->sdev_contents);
1224                 return (prof_lookup(dvp, nm, vpp, cred));
1225         }
1226         rw_exit(&parent->sdev_dotdot->sdev_contents);
1227 
1228         /* execute access is required to search the directory */
1229         if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0) {
1230                 return (error);
1231         }
1232 
1233         /* find existing name */
1234 /* XXXci - We may need to translate the C-I flags on VOP_LOOKUP */
1235         error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cred, ct, NULL, NULL);
1236         if (error == 0) {
1237                 VN_RELE(vp);
1238                 return (EEXIST);
1239         }
1240         if (error != ENOENT)
1241                 return (error);
1242 
1243         /* require write access to create a directory */
1244         if ((error = VOP_ACCESS(dvp, VWRITE, 0, cred, ct)) != 0) {
1245                 return (error);
1246         }
1247 
1248         /* put it into memory */
1249         rw_enter(&parent->sdev_contents, RW_WRITER);
1250         error = sdev_mknode(parent, nm, &self,
1251             va, NULL, NULL, cred, SDEV_READY);
1252         if (error) {
1253                 rw_exit(&parent->sdev_contents);
1254                 if (self)
1255                         SDEV_RELE(self);
1256                 return (error);
1257         }
1258         ASSERT(self && (self->sdev_state == SDEV_READY));
1259         rw_exit(&parent->sdev_contents);
1260 
1261         /* take care the timestamps for the node and its parent */
1262         sdev_update_timestamps(SDEVTOV(self), kcred,
1263             AT_CTIME|AT_MTIME|AT_ATIME);
1264         sdev_update_timestamps(dvp, kcred, AT_MTIME|AT_ATIME);
1265         if (SDEV_IS_GLOBAL(parent))
1266                 atomic_inc_ulong(&parent->sdev_gdir_gen);
1267         i_ddi_di_cache_invalidate();
1268 
1269         /* wake up other threads blocked on looking up this node */
1270         mutex_enter(&self->sdev_lookup_lock);
1271         SDEV_UNBLOCK_OTHERS(self, SDEV_LOOKUP);
1272         mutex_exit(&self->sdev_lookup_lock);
1273         *vpp = SDEVTOV(self);
1274         return (0);
1275 }
1276 
1277 /*
1278  * allowing removing an empty directory under /dev
1279  */
1280 /*ARGSUSED*/
1281 static int
1282 sdev_rmdir(struct vnode *dvp, char *nm, struct vnode *cdir, struct cred *cred,
1283     caller_context_t *ct, int flags)
1284 {
1285         int error = 0;
1286         struct sdev_node *parent = (struct sdev_node *)VTOSDEV(dvp);
1287         struct sdev_node *self = NULL;
1288         struct vnode *vp = NULL;
1289 
1290         /* bail out early */
1291         if (strcmp(nm, ".") == 0)
1292                 return (EINVAL);
1293         if (strcmp(nm, "..") == 0)
1294                 return (EEXIST); /* should be ENOTEMPTY */
1295 
1296         /* no destruction of non-global node */
1297         ASSERT(parent && parent->sdev_dotdot);
1298         rw_enter(&parent->sdev_dotdot->sdev_contents, RW_READER);
1299         if (!SDEV_IS_GLOBAL(parent)) {
1300                 rw_exit(&parent->sdev_dotdot->sdev_contents);
1301                 return (ENOTSUP);
1302         }
1303         rw_exit(&parent->sdev_dotdot->sdev_contents);
1304 
1305         /* execute access is required to search the directory */
1306         if ((error = VOP_ACCESS(dvp, VEXEC|VWRITE, 0, cred, ct)) != 0)
1307                 return (error);
1308 
1309         /* check existing name */
1310         rw_enter(&parent->sdev_contents, RW_WRITER);
1311         self = sdev_cache_lookup(parent, nm);
1312         if (self == NULL) {
1313                 rw_exit(&parent->sdev_contents);
1314                 return (ENOENT);
1315         }
1316 
1317         vp = SDEVTOV(self);
1318         if ((self->sdev_state == SDEV_INIT) ||
1319             (self->sdev_state == SDEV_ZOMBIE)) {
1320                 rw_exit(&parent->sdev_contents);
1321                 VN_RELE(vp);
1322                 return (ENOENT);
1323         }
1324 
1325         /* some sanity checks */
1326         if (vp == dvp || vp == cdir) {
1327                 rw_exit(&parent->sdev_contents);
1328                 VN_RELE(vp);
1329                 return (EINVAL);
1330         }
1331 
1332         if (vp->v_type != VDIR) {
1333                 rw_exit(&parent->sdev_contents);
1334                 VN_RELE(vp);
1335                 return (ENOTDIR);
1336         }
1337 
1338         if (vn_vfswlock(vp)) {
1339                 rw_exit(&parent->sdev_contents);
1340                 VN_RELE(vp);
1341                 return (EBUSY);
1342         }
1343 
1344         if (vn_mountedvfs(vp) != NULL) {
1345                 rw_exit(&parent->sdev_contents);
1346                 vn_vfsunlock(vp);
1347                 VN_RELE(vp);
1348                 return (EBUSY);
1349         }
1350 
1351         self = VTOSDEV(vp);
1352         /* bail out on a non-empty directory */
1353         rw_enter(&self->sdev_contents, RW_READER);
1354         if (self->sdev_nlink > 2) {
1355                 rw_exit(&self->sdev_contents);
1356                 rw_exit(&parent->sdev_contents);
1357                 vn_vfsunlock(vp);
1358                 VN_RELE(vp);
1359                 return (ENOTEMPTY);
1360         }
1361         rw_exit(&self->sdev_contents);
1362 
1363         /* unlink it from the directory cache */
1364         sdev_cache_update(parent, &self, nm, SDEV_CACHE_DELETE);
1365         rw_exit(&parent->sdev_contents);
1366         vn_vfsunlock(vp);
1367         VN_RELE(vp);
1368 
1369         /* best effort to clean up the backing store */
1370         if (SDEV_IS_PERSIST(parent)) {
1371                 ASSERT(parent->sdev_attrvp);
1372                 error = VOP_RMDIR(parent->sdev_attrvp, nm,
1373                     parent->sdev_attrvp, kcred, ct, flags);
1374 
1375                 if (error)
1376                         sdcmn_err2(("sdev_rmdir: cleaning device %s is on"
1377                             " disk error %d\n", parent->sdev_path, error));
1378                 if (error == EBUSY)
1379                         error = 0;
1380 
1381         }
1382 
1383         if (error == 0)
1384                 i_ddi_di_cache_invalidate();
1385 
1386         return (error);
1387 }
1388 
1389 /*
1390  * read the contents of a symbolic link
1391  */
1392 static int
1393 sdev_readlink(struct vnode *vp, struct uio *uiop, struct cred *cred,
1394     caller_context_t *ct)
1395 {
1396         struct sdev_node *dv;
1397         int     error = 0;
1398 
1399         ASSERT(vp->v_type == VLNK);
1400 
1401         dv = VTOSDEV(vp);
1402 
1403         if (dv->sdev_attrvp) {
1404                 /* non-NULL attrvp implys a persisted node at READY state */
1405                 return (VOP_READLINK(dv->sdev_attrvp, uiop, cred, ct));
1406         } else if (dv->sdev_symlink != NULL) {
1407                 /* memory nodes, e.g. local nodes */
1408                 rw_enter(&dv->sdev_contents, RW_READER);
1409                 sdcmn_err2(("sdev_readlink link is %s\n", dv->sdev_symlink));
1410                 error = uiomove(dv->sdev_symlink, strlen(dv->sdev_symlink),
1411                     UIO_READ, uiop);
1412                 rw_exit(&dv->sdev_contents);
1413                 return (error);
1414         }
1415 
1416         return (ENOENT);
1417 }
1418 
1419 /*ARGSUSED4*/
1420 static int
1421 sdev_readdir(struct vnode *dvp, struct uio *uiop, struct cred *cred, int *eofp,
1422     caller_context_t *ct, int flags)
1423 {
1424         struct sdev_node *parent = VTOSDEV(dvp);
1425         int error;
1426 
1427         /*
1428          * We must check that we have execute access to search the directory --
1429          * but because our sdev_contents lock is already held as a reader (the
1430          * caller must have done a VOP_RWLOCK()), we call directly into the
1431          * underlying access routine if sdev_attr is non-NULL.
1432          */
1433         if (parent->sdev_attr != NULL) {
1434                 VERIFY(RW_READ_HELD(&parent->sdev_contents));
1435 
1436                 if (sdev_unlocked_access(parent, VEXEC, cred) != 0)
1437                         return (EACCES);
1438         } else {
1439                 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0)
1440                         return (error);
1441         }
1442 
1443         ASSERT(parent);
1444         if (!SDEV_IS_GLOBAL(parent))
1445                 prof_filldir(parent);
1446         return (devname_readdir_func(dvp, uiop, cred, eofp, SDEV_BROWSE));
1447 }
1448 
1449 /*ARGSUSED1*/
1450 static void
1451 sdev_inactive(struct vnode *vp, struct cred *cred, caller_context_t *ct)
1452 {
1453         devname_inactive_func(vp, cred, NULL);
1454 }
1455 
1456 /*ARGSUSED2*/
1457 static int
1458 sdev_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
1459 {
1460         struct sdev_node        *dv = VTOSDEV(vp);
1461         struct sdev_fid *sdev_fid;
1462 
1463         if (fidp->fid_len < (sizeof (struct sdev_fid) - sizeof (ushort_t))) {
1464                 fidp->fid_len = sizeof (struct sdev_fid) - sizeof (ushort_t);
1465                 return (ENOSPC);
1466         }
1467 
1468         sdev_fid = (struct sdev_fid *)fidp;
1469         bzero(sdev_fid, sizeof (struct sdev_fid));
1470         sdev_fid->sdevfid_len =
1471             (int)sizeof (struct sdev_fid) - sizeof (ushort_t);
1472         sdev_fid->sdevfid_ino = dv->sdev_ino;
1473 
1474         return (0);
1475 }
1476 
1477 /*
1478  * This pair of routines bracket all VOP_READ, VOP_WRITE
1479  * and VOP_READDIR requests.  The contents lock stops things
1480  * moving around while we're looking at them.
1481  */
1482 /*ARGSUSED2*/
1483 static int
1484 sdev_rwlock(struct vnode *vp, int write_flag, caller_context_t *ctp)
1485 {
1486         rw_enter(&VTOSDEV(vp)->sdev_contents,
1487             write_flag ? RW_WRITER : RW_READER);
1488         return (write_flag ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE);
1489 }
1490 
1491 /*ARGSUSED1*/
1492 static void
1493 sdev_rwunlock(struct vnode *vp, int write_flag, caller_context_t *ctp)
1494 {
1495         rw_exit(&VTOSDEV(vp)->sdev_contents);
1496 }
1497 
1498 /*ARGSUSED1*/
1499 static int
1500 sdev_seek(struct vnode *vp, offset_t ooff, offset_t *noffp,
1501     caller_context_t *ct)
1502 {
1503         struct vnode *attrvp = VTOSDEV(vp)->sdev_attrvp;
1504 
1505         ASSERT(vp->v_type != VCHR &&
1506             vp->v_type != VBLK && vp->v_type != VLNK);
1507 
1508         if (vp->v_type == VDIR)
1509                 return (fs_seek(vp, ooff, noffp, ct));
1510 
1511         ASSERT(attrvp);
1512         return (VOP_SEEK(attrvp, ooff, noffp, ct));
1513 }
1514 
1515 /*ARGSUSED1*/
1516 static int
1517 sdev_frlock(struct vnode *vp, int cmd, struct flock64 *bfp, int flag,
1518     offset_t offset, struct flk_callback *flk_cbp, struct cred *cr,
1519     caller_context_t *ct)
1520 {
1521         int error;
1522         struct sdev_node *dv = VTOSDEV(vp);
1523 
1524         ASSERT(dv);
1525         ASSERT(dv->sdev_attrvp);
1526         error = VOP_FRLOCK(dv->sdev_attrvp, cmd, bfp, flag, offset,
1527             flk_cbp, cr, ct);
1528 
1529         return (error);
1530 }
1531 
1532 static int
1533 sdev_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
1534     caller_context_t *ct)
1535 {
1536         switch (cmd) {
1537         case _PC_ACL_ENABLED:
1538                 *valp = SDEV_ACL_FLAVOR(vp);
1539                 return (0);
1540         }
1541 
1542         return (fs_pathconf(vp, cmd, valp, cr, ct));
1543 }
1544 
1545 vnodeops_t *sdev_vnodeops;
1546 
1547 const fs_operation_def_t sdev_vnodeops_tbl[] = {
1548         VOPNAME_OPEN,           { .vop_open = sdev_open },
1549         VOPNAME_CLOSE,          { .vop_close = sdev_close },
1550         VOPNAME_READ,           { .vop_read = sdev_read },
1551         VOPNAME_WRITE,          { .vop_write = sdev_write },
1552         VOPNAME_IOCTL,          { .vop_ioctl = sdev_ioctl },
1553         VOPNAME_GETATTR,        { .vop_getattr = sdev_getattr },
1554         VOPNAME_SETATTR,        { .vop_setattr = sdev_setattr },
1555         VOPNAME_ACCESS,         { .vop_access = sdev_access },
1556         VOPNAME_LOOKUP,         { .vop_lookup = sdev_lookup },
1557         VOPNAME_CREATE,         { .vop_create = sdev_create },
1558         VOPNAME_RENAME,         { .vop_rename = sdev_rename },
1559         VOPNAME_REMOVE,         { .vop_remove = sdev_remove },
1560         VOPNAME_MKDIR,          { .vop_mkdir = sdev_mkdir },
1561         VOPNAME_RMDIR,          { .vop_rmdir = sdev_rmdir },
1562         VOPNAME_READDIR,        { .vop_readdir = sdev_readdir },
1563         VOPNAME_SYMLINK,        { .vop_symlink = sdev_symlink },
1564         VOPNAME_READLINK,       { .vop_readlink = sdev_readlink },
1565         VOPNAME_INACTIVE,       { .vop_inactive = sdev_inactive },
1566         VOPNAME_FID,            { .vop_fid = sdev_fid },
1567         VOPNAME_RWLOCK,         { .vop_rwlock = sdev_rwlock },
1568         VOPNAME_RWUNLOCK,       { .vop_rwunlock = sdev_rwunlock },
1569         VOPNAME_SEEK,           { .vop_seek = sdev_seek },
1570         VOPNAME_FRLOCK,         { .vop_frlock = sdev_frlock },
1571         VOPNAME_PATHCONF,       { .vop_pathconf = sdev_pathconf },
1572         VOPNAME_SETSECATTR,     { .vop_setsecattr = sdev_setsecattr },
1573         VOPNAME_GETSECATTR,     { .vop_getsecattr = sdev_getsecattr },
1574         NULL,                   NULL
1575 };
1576 
1577 int sdev_vnodeops_tbl_size = sizeof (sdev_vnodeops_tbl);