Print this page
    
OS-2910 sdev_node_t can end up associated with wrong sdev_plugin_t
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/dev/sdev_subr.c
          +++ new/usr/src/uts/common/fs/dev/sdev_subr.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright (c) 2013, 2016 Joyent, Inc. All rights reserved.
  24   24   */
  25   25  
  26   26  /*
  27   27   * utility routines for the /dev fs
  28   28   */
  29   29  
  30   30  #include <sys/types.h>
  31   31  #include <sys/param.h>
  32   32  #include <sys/t_lock.h>
  33   33  #include <sys/systm.h>
  34   34  #include <sys/sysmacros.h>
  35   35  #include <sys/user.h>
  36   36  #include <sys/time.h>
  37   37  #include <sys/vfs.h>
  38   38  #include <sys/vnode.h>
  39   39  #include <sys/file.h>
  40   40  #include <sys/fcntl.h>
  41   41  #include <sys/flock.h>
  42   42  #include <sys/kmem.h>
  43   43  #include <sys/uio.h>
  44   44  #include <sys/errno.h>
  45   45  #include <sys/stat.h>
  46   46  #include <sys/cred.h>
  47   47  #include <sys/dirent.h>
  48   48  #include <sys/pathname.h>
  49   49  #include <sys/cmn_err.h>
  50   50  #include <sys/debug.h>
  51   51  #include <sys/mode.h>
  52   52  #include <sys/policy.h>
  53   53  #include <fs/fs_subr.h>
  54   54  #include <sys/mount.h>
  55   55  #include <sys/fs/snode.h>
  56   56  #include <sys/fs/dv_node.h>
  57   57  #include <sys/fs/sdev_impl.h>
  58   58  #include <sys/sunndi.h>
  59   59  #include <sys/sunmdi.h>
  60   60  #include <sys/conf.h>
  61   61  #include <sys/proc.h>
  62   62  #include <sys/user.h>
  63   63  #include <sys/modctl.h>
  64   64  
  65   65  #ifdef DEBUG
  66   66  int sdev_debug = 0x00000001;
  67   67  int sdev_debug_cache_flags = 0;
  68   68  #endif
  69   69  
  70   70  /*
  71   71   * globals
  72   72   */
  73   73  /* prototype memory vattrs */
  74   74  vattr_t sdev_vattr_dir = {
  75   75          AT_TYPE|AT_MODE|AT_UID|AT_GID,          /* va_mask */
  76   76          VDIR,                                   /* va_type */
  77   77          SDEV_DIRMODE_DEFAULT,                   /* va_mode */
  78   78          SDEV_UID_DEFAULT,                       /* va_uid */
  79   79          SDEV_GID_DEFAULT,                       /* va_gid */
  80   80          0,                                      /* va_fsid */
  81   81          0,                                      /* va_nodeid */
  82   82          0,                                      /* va_nlink */
  83   83          0,                                      /* va_size */
  84   84          0,                                      /* va_atime */
  85   85          0,                                      /* va_mtime */
  86   86          0,                                      /* va_ctime */
  87   87          0,                                      /* va_rdev */
  88   88          0,                                      /* va_blksize */
  89   89          0,                                      /* va_nblocks */
  90   90          0                                       /* va_vcode */
  91   91  };
  92   92  
  93   93  vattr_t sdev_vattr_lnk = {
  94   94          AT_TYPE|AT_MODE,                        /* va_mask */
  95   95          VLNK,                                   /* va_type */
  96   96          SDEV_LNKMODE_DEFAULT,                   /* va_mode */
  97   97          SDEV_UID_DEFAULT,                       /* va_uid */
  98   98          SDEV_GID_DEFAULT,                       /* va_gid */
  99   99          0,                                      /* va_fsid */
 100  100          0,                                      /* va_nodeid */
 101  101          0,                                      /* va_nlink */
 102  102          0,                                      /* va_size */
 103  103          0,                                      /* va_atime */
 104  104          0,                                      /* va_mtime */
 105  105          0,                                      /* va_ctime */
 106  106          0,                                      /* va_rdev */
 107  107          0,                                      /* va_blksize */
 108  108          0,                                      /* va_nblocks */
 109  109          0                                       /* va_vcode */
 110  110  };
 111  111  
 112  112  vattr_t sdev_vattr_blk = {
 113  113          AT_TYPE|AT_MODE|AT_UID|AT_GID,          /* va_mask */
 114  114          VBLK,                                   /* va_type */
 115  115          S_IFBLK | SDEV_DEVMODE_DEFAULT,         /* va_mode */
 116  116          SDEV_UID_DEFAULT,                       /* va_uid */
 117  117          SDEV_GID_DEFAULT,                       /* va_gid */
 118  118          0,                                      /* va_fsid */
 119  119          0,                                      /* va_nodeid */
 120  120          0,                                      /* va_nlink */
 121  121          0,                                      /* va_size */
 122  122          0,                                      /* va_atime */
 123  123          0,                                      /* va_mtime */
 124  124          0,                                      /* va_ctime */
 125  125          0,                                      /* va_rdev */
 126  126          0,                                      /* va_blksize */
 127  127          0,                                      /* va_nblocks */
 128  128          0                                       /* va_vcode */
 129  129  };
 130  130  
 131  131  vattr_t sdev_vattr_chr = {
 132  132          AT_TYPE|AT_MODE|AT_UID|AT_GID,          /* va_mask */
 133  133          VCHR,                                   /* va_type */
 134  134          S_IFCHR | SDEV_DEVMODE_DEFAULT,         /* va_mode */
 135  135          SDEV_UID_DEFAULT,                       /* va_uid */
 136  136          SDEV_GID_DEFAULT,                       /* va_gid */
 137  137          0,                                      /* va_fsid */
 138  138          0,                                      /* va_nodeid */
 139  139          0,                                      /* va_nlink */
 140  140          0,                                      /* va_size */
 141  141          0,                                      /* va_atime */
 142  142          0,                                      /* va_mtime */
 143  143          0,                                      /* va_ctime */
 144  144          0,                                      /* va_rdev */
 145  145          0,                                      /* va_blksize */
 146  146          0,                                      /* va_nblocks */
 147  147          0                                       /* va_vcode */
 148  148  };
 149  149  
 150  150  kmem_cache_t    *sdev_node_cache;       /* sdev_node cache */
 151  151  int             devtype;                /* fstype */
 152  152  
 153  153  /* static */
 154  154  static struct vnodeops *sdev_get_vop(struct sdev_node *);
 155  155  static void sdev_set_no_negcache(struct sdev_node *);
 156  156  static fs_operation_def_t *sdev_merge_vtab(const fs_operation_def_t []);
 157  157  static void sdev_free_vtab(fs_operation_def_t *);
 158  158  
 159  159  static void
 160  160  sdev_prof_free(struct sdev_node *dv)
 161  161  {
 162  162          ASSERT(!SDEV_IS_GLOBAL(dv));
 163  163          nvlist_free(dv->sdev_prof.dev_name);
 164  164          nvlist_free(dv->sdev_prof.dev_map);
 165  165          nvlist_free(dv->sdev_prof.dev_symlink);
 166  166          nvlist_free(dv->sdev_prof.dev_glob_incdir);
 167  167          nvlist_free(dv->sdev_prof.dev_glob_excdir);
 168  168          bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
 169  169  }
 170  170  
 171  171  /* sdev_node cache constructor */
 172  172  /*ARGSUSED1*/
 173  173  static int
 174  174  i_sdev_node_ctor(void *buf, void *cfarg, int flag)
 175  175  {
 176  176          struct sdev_node *dv = (struct sdev_node *)buf;
 177  177          struct vnode *vp;
 178  178  
 179  179          bzero(buf, sizeof (struct sdev_node));
 180  180          vp = dv->sdev_vnode = vn_alloc(flag);
 181  181          if (vp == NULL) {
 182  182                  return (-1);
 183  183          }
 184  184          vp->v_data = dv;
 185  185          rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL);
 186  186          return (0);
 187  187  }
 188  188  
 189  189  /* sdev_node cache destructor */
 190  190  /*ARGSUSED1*/
 191  191  static void
 192  192  i_sdev_node_dtor(void *buf, void *arg)
 193  193  {
 194  194          struct sdev_node *dv = (struct sdev_node *)buf;
 195  195          struct vnode *vp = SDEVTOV(dv);
 196  196  
 197  197          rw_destroy(&dv->sdev_contents);
 198  198          vn_free(vp);
 199  199  }
 200  200  
 201  201  /* initialize sdev_node cache */
 202  202  void
 203  203  sdev_node_cache_init()
 204  204  {
 205  205          int flags = 0;
 206  206  
 207  207  #ifdef  DEBUG
 208  208          flags = sdev_debug_cache_flags;
 209  209          if (flags)
 210  210                  sdcmn_err(("cache debug flags 0x%x\n", flags));
 211  211  #endif  /* DEBUG */
 212  212  
 213  213          ASSERT(sdev_node_cache == NULL);
 214  214          sdev_node_cache = kmem_cache_create("sdev_node_cache",
 215  215              sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor,
 216  216              NULL, NULL, NULL, flags);
 217  217  }
 218  218  
 219  219  /* destroy sdev_node cache */
 220  220  void
 221  221  sdev_node_cache_fini()
 222  222  {
 223  223          ASSERT(sdev_node_cache != NULL);
 224  224          kmem_cache_destroy(sdev_node_cache);
 225  225          sdev_node_cache = NULL;
 226  226  }
 227  227  
 228  228  /*
 229  229   * Compare two nodes lexographically to balance avl tree
 230  230   */
 231  231  static int
 232  232  sdev_compare_nodes(const struct sdev_node *dv1, const struct sdev_node *dv2)
 233  233  {
 234  234          int rv;
 235  235          if ((rv = strcmp(dv1->sdev_name, dv2->sdev_name)) == 0)
 236  236                  return (0);
 237  237          return ((rv < 0) ? -1 : 1);
 238  238  }
 239  239  
 240  240  void
 241  241  sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state)
 242  242  {
 243  243          ASSERT(dv);
 244  244          ASSERT(RW_WRITE_HELD(&dv->sdev_contents));
 245  245          dv->sdev_state = state;
 246  246  }
 247  247  
 248  248  static void
 249  249  sdev_attr_update(struct sdev_node *dv, vattr_t *vap)
 250  250  {
 251  251          timestruc_t     now;
 252  252          struct vattr    *attrp;
 253  253          uint_t          mask;
 254  254  
 255  255          ASSERT(dv->sdev_attr);
 256  256          ASSERT(vap);
 257  257  
 258  258          attrp = dv->sdev_attr;
 259  259          mask = vap->va_mask;
 260  260          if (mask & AT_TYPE)
 261  261                  attrp->va_type = vap->va_type;
 262  262          if (mask & AT_MODE)
 263  263                  attrp->va_mode = vap->va_mode;
 264  264          if (mask & AT_UID)
 265  265                  attrp->va_uid = vap->va_uid;
 266  266          if (mask & AT_GID)
 267  267                  attrp->va_gid = vap->va_gid;
 268  268          if (mask & AT_RDEV)
 269  269                  attrp->va_rdev = vap->va_rdev;
 270  270  
 271  271          gethrestime(&now);
 272  272          attrp->va_atime = (mask & AT_ATIME) ? vap->va_atime : now;
 273  273          attrp->va_mtime = (mask & AT_MTIME) ? vap->va_mtime : now;
 274  274          attrp->va_ctime = (mask & AT_CTIME) ? vap->va_ctime : now;
 275  275  }
 276  276  
 277  277  static void
 278  278  sdev_attr_alloc(struct sdev_node *dv, vattr_t *vap)
 279  279  {
 280  280          ASSERT(dv->sdev_attr == NULL);
 281  281          ASSERT(vap->va_mask & AT_TYPE);
 282  282          ASSERT(vap->va_mask & AT_MODE);
 283  283  
 284  284          dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP);
 285  285          sdev_attr_update(dv, vap);
 286  286  }
 287  287  
 288  288  /* alloc and initialize a sdev_node */
 289  289  int
 290  290  sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
 291  291      vattr_t *vap)
 292  292  {
 293  293          struct sdev_node *dv = NULL;
 294  294          struct vnode *vp;
 295  295          size_t nmlen, len;
 296  296          devname_handle_t  *dhl;
 297  297  
 298  298          nmlen = strlen(nm) + 1;
 299  299          if (nmlen > MAXNAMELEN) {
 300  300                  sdcmn_err9(("sdev_nodeinit: node name %s"
 301  301                      " too long\n", nm));
 302  302                  *newdv = NULL;
 303  303                  return (ENAMETOOLONG);
 304  304          }
 305  305  
 306  306          dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
 307  307  
 308  308          dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP);
 309  309          bcopy(nm, dv->sdev_name, nmlen);
 310  310          dv->sdev_namelen = nmlen - 1;   /* '\0' not included */
 311  311          len = strlen(ddv->sdev_path) + strlen(nm) + 2;
 312  312          dv->sdev_path = kmem_alloc(len, KM_SLEEP);
 313  313          (void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm);
 314  314          /* overwritten for VLNK nodes */
 315  315          dv->sdev_symlink = NULL;
 316  316  
 317  317          vp = SDEVTOV(dv);
 318  318          vn_reinit(vp);
 319  319          vp->v_vfsp = SDEVTOV(ddv)->v_vfsp;
 320  320          if (vap)
 321  321                  vp->v_type = vap->va_type;
 322  322  
 323  323          /*
 324  324           * initialized to the parent's vnodeops.
 325  325           * maybe overwriten for a VDIR
 326  326           */
 327  327          vn_setops(vp, vn_getops(SDEVTOV(ddv)));
 328  328          vn_exists(vp);
 329  329  
 330  330          dv->sdev_dotdot = NULL;
 331  331          dv->sdev_attrvp = NULL;
 332  332          if (vap) {
 333  333                  sdev_attr_alloc(dv, vap);
 334  334          } else {
 335  335                  dv->sdev_attr = NULL;
 336  336          }
 337  337  
 338  338          dv->sdev_ino = sdev_mkino(dv);
 339  339          dv->sdev_nlink = 0;             /* updated on insert */
 340  340          dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */
 341  341          dv->sdev_flags |= SDEV_BUILD;
 342  342          mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
 343  343          cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
 344  344          if (SDEV_IS_GLOBAL(ddv)) {
 345  345                  dv->sdev_flags |= SDEV_GLOBAL;
 346  346                  dhl = &(dv->sdev_handle);
 347  347                  dhl->dh_data = dv;
 348  348                  dhl->dh_args = NULL;
 349  349                  sdev_set_no_negcache(dv);
 350  350                  dv->sdev_gdir_gen = 0;
 351  351          } else {
 352  352                  dv->sdev_flags &= ~SDEV_GLOBAL;
 353  353                  dv->sdev_origin = NULL; /* set later */
 354  354                  bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
 355  355                  dv->sdev_ldir_gen = 0;
 356  356                  dv->sdev_devtree_gen = 0;
 357  357          }
 358  358  
 359  359          rw_enter(&dv->sdev_contents, RW_WRITER);
 360  360          sdev_set_nodestate(dv, SDEV_INIT);
 361  361          rw_exit(&dv->sdev_contents);
 362  362          *newdv = dv;
 363  363  
 364  364          return (0);
 365  365  }
 366  366  
 367  367  /*
 368  368   * Transition a sdev_node into SDEV_READY state. If this fails, it is up to the
 369  369   * caller to transition the node to the SDEV_ZOMBIE state.
 370  370   */
 371  371  int
 372  372  sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp,
 373  373      void *args, struct cred *cred)
 374  374  {
 375  375          int error = 0;
 376  376          struct vnode *vp = SDEVTOV(dv);
 377  377          vtype_t type;
 378  378  
 379  379          ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap);
 380  380  
 381  381          type = vap->va_type;
 382  382          vp->v_type = type;
 383  383          vp->v_rdev = vap->va_rdev;
 384  384          rw_enter(&dv->sdev_contents, RW_WRITER);
 385  385          if (type == VDIR) {
 386  386                  dv->sdev_nlink = 2;
 387  387                  dv->sdev_flags &= ~SDEV_PERSIST;
 388  388                  dv->sdev_flags &= ~SDEV_DYNAMIC;
 389  389                  vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */
 390  390                  ASSERT(dv->sdev_dotdot);
 391  391                  ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR);
 392  392                  vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev;
 393  393                  avl_create(&dv->sdev_entries,
 394  394                      (int (*)(const void *, const void *))sdev_compare_nodes,
 395  395                      sizeof (struct sdev_node),
 396  396                      offsetof(struct sdev_node, sdev_avllink));
 397  397          } else if (type == VLNK) {
 398  398                  ASSERT(args);
 399  399                  dv->sdev_nlink = 1;
 400  400                  dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP);
 401  401          } else {
 402  402                  dv->sdev_nlink = 1;
 403  403          }
 404  404  
 405  405          if (!(SDEV_IS_GLOBAL(dv))) {
 406  406                  dv->sdev_origin = (struct sdev_node *)args;
 407  407                  dv->sdev_flags &= ~SDEV_PERSIST;
 408  408          }
 409  409  
 410  410          /*
 411  411           * shadow node is created here OR
 412  412           * if failed (indicated by dv->sdev_attrvp == NULL),
 413  413           * created later in sdev_setattr
 414  414           */
 415  415          if (avp) {
 416  416                  dv->sdev_attrvp = avp;
 417  417          } else {
 418  418                  if (dv->sdev_attr == NULL) {
 419  419                          sdev_attr_alloc(dv, vap);
 420  420                  } else {
 421  421                          sdev_attr_update(dv, vap);
 422  422                  }
 423  423  
 424  424                  if ((dv->sdev_attrvp == NULL) && SDEV_IS_PERSIST(dv))
 425  425                          error = sdev_shadow_node(dv, cred);
 426  426          }
 427  427  
 428  428          if (error == 0) {
 429  429                  /* transition to READY state */
 430  430                  sdev_set_nodestate(dv, SDEV_READY);
 431  431                  sdev_nc_node_exists(dv);
 432  432          }
 433  433          rw_exit(&dv->sdev_contents);
 434  434          return (error);
 435  435  }
 436  436  
 437  437  /*
 438  438   * Build the VROOT sdev_node.
 439  439   */
 440  440  /*ARGSUSED*/
 441  441  struct sdev_node *
 442  442  sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp,
 443  443      struct vnode *avp, struct cred *cred)
 444  444  {
 445  445          struct sdev_node *dv;
 446  446          struct vnode *vp;
 447  447          char devdir[] = "/dev";
 448  448  
 449  449          ASSERT(sdev_node_cache != NULL);
 450  450          ASSERT(avp);
 451  451          dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
 452  452          vp = SDEVTOV(dv);
 453  453          vn_reinit(vp);
 454  454          vp->v_flag |= VROOT;
 455  455          vp->v_vfsp = vfsp;
 456  456          vp->v_type = VDIR;
 457  457          vp->v_rdev = devdev;
 458  458          vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */
 459  459          vn_exists(vp);
 460  460  
 461  461          if (vfsp->vfs_mntpt)
 462  462                  dv->sdev_name = i_ddi_strdup(
 463  463                      (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP);
 464  464          else
 465  465                  /* vfs_mountdev1 set mount point later */
 466  466                  dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP);
 467  467          dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */
 468  468          dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP);
 469  469          dv->sdev_ino = SDEV_ROOTINO;
 470  470          dv->sdev_nlink = 2;             /* name + . (no sdev_insert) */
 471  471          dv->sdev_dotdot = dv;           /* .. == self */
 472  472          dv->sdev_attrvp = avp;
 473  473          dv->sdev_attr = NULL;
 474  474          mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
 475  475          cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
 476  476          if (strcmp(dv->sdev_name, "/dev") == 0) {
 477  477                  dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST;
 478  478                  bzero(&dv->sdev_handle, sizeof (dv->sdev_handle));
 479  479                  dv->sdev_gdir_gen = 0;
 480  480          } else {
 481  481                  dv->sdev_flags = SDEV_BUILD;
 482  482                  dv->sdev_flags &= ~SDEV_PERSIST;
 483  483                  bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
 484  484                  dv->sdev_ldir_gen = 0;
 485  485                  dv->sdev_devtree_gen = 0;
 486  486          }
 487  487  
 488  488          avl_create(&dv->sdev_entries,
 489  489              (int (*)(const void *, const void *))sdev_compare_nodes,
 490  490              sizeof (struct sdev_node),
 491  491              offsetof(struct sdev_node, sdev_avllink));
 492  492  
 493  493          rw_enter(&dv->sdev_contents, RW_WRITER);
 494  494          sdev_set_nodestate(dv, SDEV_READY);
 495  495          rw_exit(&dv->sdev_contents);
 496  496          sdev_nc_node_exists(dv);
 497  497          return (dv);
 498  498  }
 499  499  
 500  500  /* directory dependent vop table */
 501  501  struct sdev_vop_table {
 502  502          char *vt_name;                          /* subdirectory name */
 503  503          const fs_operation_def_t *vt_service;   /* vnodeops table */
 504  504          struct vnodeops *vt_vops;               /* constructed vop */
 505  505          struct vnodeops **vt_global_vops;       /* global container for vop */
 506  506          int (*vt_vtor)(struct sdev_node *);     /* validate sdev_node */
 507  507          int vt_flags;
 508  508  };
 509  509  
 510  510  /*
 511  511   * A nice improvement would be to provide a plug-in mechanism
 512  512   * for this table instead of a const table.
 513  513   */
 514  514  static struct sdev_vop_table vtab[] =
 515  515  {
 516  516          { "pts", devpts_vnodeops_tbl, NULL, &devpts_vnodeops, devpts_validate,
 517  517          SDEV_DYNAMIC | SDEV_VTOR },
 518  518  
 519  519          { "vt", devvt_vnodeops_tbl, NULL, &devvt_vnodeops, devvt_validate,
 520  520          SDEV_DYNAMIC | SDEV_VTOR },
 521  521  
 522  522          { "zvol", devzvol_vnodeops_tbl, NULL, &devzvol_vnodeops,
 523  523          devzvol_validate, SDEV_ZONED | SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR },
 524  524  
 525  525          { "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE },
 526  526  
 527  527          { "net", devnet_vnodeops_tbl, NULL, &devnet_vnodeops, devnet_validate,
 528  528          SDEV_DYNAMIC | SDEV_VTOR },
 529  529  
 530  530          { "ipnet", devipnet_vnodeops_tbl, NULL, &devipnet_vnodeops,
 531  531          devipnet_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_NO_NCACHE },
 532  532  
 533  533          /*
 534  534           * SDEV_DYNAMIC: prevent calling out to devfsadm, since only the
 535  535           * lofi driver controls child nodes.
 536  536           *
 537  537           * SDEV_PERSIST: ensure devfsadm knows to clean up any persisted
 538  538           * stale nodes (e.g. from devfsadm -R).
 539  539           *
 540  540           * In addition, devfsadm knows not to attempt a rmdir: a zone
 541  541           * may hold a reference, which would zombify the node,
 542  542           * preventing a mkdir.
 543  543           */
 544  544  
 545  545          { "lofi", NULL, NULL, NULL, NULL,
 546  546              SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST },
 547  547          { "rlofi", NULL, NULL, NULL, NULL,
 548  548              SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST },
 549  549  
 550  550          { NULL, NULL, NULL, NULL, NULL, 0}
 551  551  };
 552  552  
 553  553  /*
 554  554   * We need to match off of the sdev_path, not the sdev_name. We are only allowed
 555  555   * to exist directly under /dev.
 556  556   */
 557  557  struct sdev_vop_table *
 558  558  sdev_match(struct sdev_node *dv)
 559  559  {
 560  560          int vlen;
 561  561          int i;
 562  562          const char *path;
 563  563  
 564  564          if (strlen(dv->sdev_path) <= 5)
 565  565                  return (NULL);
 566  566  
 567  567          if (strncmp(dv->sdev_path, "/dev/", 5) != 0)
 568  568                  return (NULL);
 569  569          path = dv->sdev_path + 5;
 570  570  
 571  571          for (i = 0; vtab[i].vt_name; i++) {
 572  572                  if (strcmp(vtab[i].vt_name, path) == 0)
 573  573                          return (&vtab[i]);
 574  574                  if (vtab[i].vt_flags & SDEV_SUBDIR) {
 575  575                          vlen = strlen(vtab[i].vt_name);
 576  576                          if ((strncmp(vtab[i].vt_name, path,
 577  577                              vlen - 1) == 0) && path[vlen] == '/')
 578  578                                  return (&vtab[i]);
 579  579                  }
 580  580  
 581  581          }
 582  582          return (NULL);
 583  583  }
 584  584  
 585  585  /*
 586  586   *  sets a directory's vnodeops if the directory is in the vtab;
 587  587   */
 588  588  static struct vnodeops *
 589  589  sdev_get_vop(struct sdev_node *dv)
 590  590  {
 591  591          struct sdev_vop_table *vtp;
 592  592          char *path;
 593  593  
 594  594          path = dv->sdev_path;
 595  595          ASSERT(path);
 596  596  
 597  597          /* gets the relative path to /dev/ */
 598  598          path += 5;
 599  599  
 600  600          /* gets the vtab entry it matches */
 601  601          if ((vtp = sdev_match(dv)) != NULL) {
 602  602                  dv->sdev_flags |= vtp->vt_flags;
 603  603                  if (SDEV_IS_PERSIST(dv->sdev_dotdot) &&
 604  604                      (SDEV_IS_PERSIST(dv) || !SDEV_IS_DYNAMIC(dv)))
 605  605                          dv->sdev_flags |= SDEV_PERSIST;
 606  606  
 607  607                  if (vtp->vt_vops) {
 608  608                          if (vtp->vt_global_vops)
 609  609                                  *(vtp->vt_global_vops) = vtp->vt_vops;
 610  610  
 611  611                          return (vtp->vt_vops);
 612  612                  }
 613  613  
 614  614                  if (vtp->vt_service) {
 615  615                          fs_operation_def_t *templ;
 616  616                          templ = sdev_merge_vtab(vtp->vt_service);
 617  617                          if (vn_make_ops(vtp->vt_name,
 618  618                              (const fs_operation_def_t *)templ,
 619  619                              &vtp->vt_vops) != 0) {
 620  620                                  cmn_err(CE_PANIC, "%s: malformed vnode ops\n",
 621  621                                      vtp->vt_name);
 622  622                                  /*NOTREACHED*/
 623  623                          }
 624  624                          if (vtp->vt_global_vops) {
 625  625                                  *(vtp->vt_global_vops) = vtp->vt_vops;
 626  626                          }
 627  627                          sdev_free_vtab(templ);
 628  628  
 629  629                          return (vtp->vt_vops);
 630  630                  }
 631  631  
 632  632                  return (sdev_vnodeops);
 633  633          }
 634  634  
 635  635          /* child inherits the persistence of the parent */
 636  636          if (SDEV_IS_PERSIST(dv->sdev_dotdot))
 637  637                  dv->sdev_flags |= SDEV_PERSIST;
 638  638  
 639  639          return (sdev_vnodeops);
 640  640  }
 641  641  
 642  642  static void
 643  643  sdev_set_no_negcache(struct sdev_node *dv)
 644  644  {
 645  645          int i;
 646  646          char *path;
 647  647  
 648  648          ASSERT(dv->sdev_path);
 649  649          path = dv->sdev_path + strlen("/dev/");
 650  650  
 651  651          for (i = 0; vtab[i].vt_name; i++) {
 652  652                  if (strcmp(vtab[i].vt_name, path) == 0) {
 653  653                          if (vtab[i].vt_flags & SDEV_NO_NCACHE)
 654  654                                  dv->sdev_flags |= SDEV_NO_NCACHE;
 655  655                          break;
 656  656                  }
 657  657          }
 658  658  }
 659  659  
 660  660  void *
 661  661  sdev_get_vtor(struct sdev_node *dv)
 662  662  {
 663  663          struct sdev_vop_table *vtp;
 664  664  
 665  665          vtp = sdev_match(dv);
 666  666          if (vtp)
 667  667                  return ((void *)vtp->vt_vtor);
 668  668          else
 669  669                  return (NULL);
 670  670  }
 671  671  
 672  672  /*
 673  673   * Build the base root inode
 674  674   */
 675  675  ino_t
 676  676  sdev_mkino(struct sdev_node *dv)
 677  677  {
 678  678          ino_t   ino;
 679  679  
 680  680          /*
 681  681           * for now, follow the lead of tmpfs here
 682  682           * need to someday understand the requirements here
 683  683           */
 684  684          ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3);
 685  685          ino += SDEV_ROOTINO + 1;
 686  686  
 687  687          return (ino);
 688  688  }
 689  689  
 690  690  int
 691  691  sdev_getlink(struct vnode *linkvp, char **link)
 692  692  {
 693  693          int err;
 694  694          char *buf;
 695  695          struct uio uio = {0};
 696  696          struct iovec iov = {0};
 697  697  
 698  698          if (linkvp == NULL)
 699  699                  return (ENOENT);
 700  700          ASSERT(linkvp->v_type == VLNK);
 701  701  
 702  702          buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
 703  703          iov.iov_base = buf;
 704  704          iov.iov_len = MAXPATHLEN;
 705  705          uio.uio_iov = &iov;
 706  706          uio.uio_iovcnt = 1;
 707  707          uio.uio_resid = MAXPATHLEN;
 708  708          uio.uio_segflg = UIO_SYSSPACE;
 709  709          uio.uio_llimit = MAXOFFSET_T;
 710  710  
 711  711          err = VOP_READLINK(linkvp, &uio, kcred, NULL);
 712  712          if (err) {
 713  713                  cmn_err(CE_WARN, "readlink %s failed in dev\n", buf);
 714  714                  kmem_free(buf, MAXPATHLEN);
 715  715                  return (ENOENT);
 716  716          }
 717  717  
 718  718          /* mission complete */
 719  719          *link = i_ddi_strdup(buf, KM_SLEEP);
 720  720          kmem_free(buf, MAXPATHLEN);
 721  721          return (0);
 722  722  }
 723  723  
 724  724  /*
 725  725   * A convenient wrapper to get the devfs node vnode for a device
 726  726   * minor functionality: readlink() of a /dev symlink
 727  727   * Place the link into dv->sdev_symlink
 728  728   */
 729  729  static int
 730  730  sdev_follow_link(struct sdev_node *dv)
 731  731  {
 732  732          int err;
 733  733          struct vnode *linkvp;
 734  734          char *link = NULL;
 735  735  
 736  736          linkvp = SDEVTOV(dv);
 737  737          if (linkvp == NULL)
 738  738                  return (ENOENT);
 739  739          ASSERT(linkvp->v_type == VLNK);
 740  740          err = sdev_getlink(linkvp, &link);
 741  741          if (err) {
 742  742                  dv->sdev_symlink = NULL;
 743  743                  return (ENOENT);
 744  744          }
 745  745  
 746  746          ASSERT(link != NULL);
 747  747          dv->sdev_symlink = link;
 748  748          return (0);
 749  749  }
 750  750  
 751  751  static int
 752  752  sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs)
 753  753  {
 754  754          vtype_t otype = SDEVTOV(dv)->v_type;
 755  755  
 756  756          /*
 757  757           * existing sdev_node has a different type.
 758  758           */
 759  759          if (otype != nvap->va_type) {
 760  760                  sdcmn_err9(("sdev_node_check: existing node "
 761  761                      "  %s type %d does not match new node type %d\n",
 762  762                      dv->sdev_name, otype, nvap->va_type));
 763  763                  return (EEXIST);
 764  764          }
 765  765  
 766  766          /*
 767  767           * For a symlink, the target should be the same.
 768  768           */
 769  769          if (otype == VLNK) {
 770  770                  ASSERT(nargs != NULL);
 771  771                  ASSERT(dv->sdev_symlink != NULL);
 772  772                  if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) {
 773  773                          sdcmn_err9(("sdev_node_check: existing node "
 774  774                              " %s has different symlink %s as new node "
 775  775                              " %s\n", dv->sdev_name, dv->sdev_symlink,
 776  776                              (char *)nargs));
 777  777                          return (EEXIST);
 778  778                  }
 779  779          }
 780  780  
 781  781          return (0);
 782  782  }
 783  783  
 784  784  /*
 785  785   * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready()
 786  786   *
 787  787   * arguments:
 788  788   *      - ddv (parent)
 789  789   *      - nm (child name)
 790  790   *      - newdv (sdev_node for nm is returned here)
 791  791   *      - vap (vattr for the node to be created, va_type should be set.
 792  792   *      - avp (attribute vnode)
 793  793   *        the defaults should be used if unknown)
 794  794   *      - cred
 795  795   *      - args
 796  796   *          . tnm (for VLNK)
 797  797   *          . global sdev_node (for !SDEV_GLOBAL)
 798  798   *      - state: SDEV_INIT, SDEV_READY
 799  799   *
 800  800   * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT)
 801  801   *
 802  802   * NOTE:  directory contents writers lock needs to be held before
 803  803   *        calling this routine.
 804  804   */
 805  805  int
 806  806  sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
 807  807      struct vattr *vap, struct vnode *avp, void *args, struct cred *cred,
 808  808      sdev_node_state_t state)
 809  809  {
 810  810          int error = 0;
 811  811          sdev_node_state_t node_state;
 812  812          struct sdev_node *dv = NULL;
 813  813  
 814  814          ASSERT(state != SDEV_ZOMBIE);
 815  815          ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
 816  816  
 817  817          if (*newdv) {
 818  818                  dv = *newdv;
 819  819          } else {
 820  820                  /* allocate and initialize a sdev_node */
 821  821                  if (ddv->sdev_state == SDEV_ZOMBIE) {
 822  822                          sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n",
 823  823                              ddv->sdev_path));
 824  824                          return (ENOENT);
 825  825                  }
 826  826  
 827  827                  error = sdev_nodeinit(ddv, nm, &dv, vap);
 828  828                  if (error != 0) {
 829  829                          sdcmn_err9(("sdev_mknode: error %d,"
 830  830                              " name %s can not be initialized\n",
 831  831                              error, nm));
 832  832                          return (error);
 833  833                  }
 834  834                  ASSERT(dv);
 835  835  
 836  836                  /* insert into the directory cache */
 837  837                  sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD);
 838  838          }
 839  839  
 840  840          ASSERT(dv);
 841  841          node_state = dv->sdev_state;
 842  842          ASSERT(node_state != SDEV_ZOMBIE);
 843  843  
 844  844          if (state == SDEV_READY) {
 845  845                  switch (node_state) {
 846  846                  case SDEV_INIT:
 847  847                          error = sdev_nodeready(dv, vap, avp, args, cred);
 848  848                          if (error) {
 849  849                                  sdcmn_err9(("sdev_mknode: node %s can NOT"
 850  850                                      " be transitioned into READY state, "
 851  851                                      "error %d\n", nm, error));
 852  852                          }
 853  853                          break;
 854  854                  case SDEV_READY:
 855  855                          /*
 856  856                           * Do some sanity checking to make sure
 857  857                           * the existing sdev_node is what has been
 858  858                           * asked for.
 859  859                           */
 860  860                          error = sdev_node_check(dv, vap, args);
 861  861                          break;
 862  862                  default:
 863  863                          break;
 864  864                  }
 865  865          }
 866  866  
 867  867          if (!error) {
 868  868                  *newdv = dv;
 869  869                  ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE);
 870  870          } else {
 871  871                  sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE);
 872  872                  /*
 873  873                   * We created this node, it wasn't passed into us. Therefore it
 874  874                   * is up to us to delete it.
 875  875                   */
 876  876                  if (*newdv == NULL)
 877  877                          SDEV_SIMPLE_RELE(dv);
 878  878                  *newdv = NULL;
 879  879          }
 880  880  
 881  881          return (error);
 882  882  }
 883  883  
 884  884  /*
 885  885   * convenient wrapper to change vp's ATIME, CTIME and MTIME
 886  886   */
 887  887  void
 888  888  sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask)
 889  889  {
 890  890          struct vattr attr;
 891  891          timestruc_t now;
 892  892          int err;
 893  893  
 894  894          ASSERT(vp);
 895  895          gethrestime(&now);
 896  896          if (mask & AT_CTIME)
 897  897                  attr.va_ctime = now;
 898  898          if (mask & AT_MTIME)
 899  899                  attr.va_mtime = now;
 900  900          if (mask & AT_ATIME)
 901  901                  attr.va_atime = now;
 902  902  
 903  903          attr.va_mask = (mask & AT_TIMES);
 904  904          err = VOP_SETATTR(vp, &attr, 0, cred, NULL);
 905  905          if (err && (err != EROFS)) {
 906  906                  sdcmn_err(("update timestamps error %d\n", err));
 907  907          }
 908  908  }
 909  909  
 910  910  /*
 911  911   * the backing store vnode is released here
 912  912   */
 913  913  /*ARGSUSED1*/
 914  914  void
 915  915  sdev_nodedestroy(struct sdev_node *dv, uint_t flags)
 916  916  {
 917  917          /* no references */
 918  918          ASSERT(dv->sdev_nlink == 0);
 919  919  
 920  920          if (dv->sdev_attrvp != NULLVP) {
 921  921                  VN_RELE(dv->sdev_attrvp);
 922  922                  /*
 923  923                   * reset the attrvp so that no more
 924  924                   * references can be made on this already
 925  925                   * vn_rele() vnode
 926  926                   */
 927  927                  dv->sdev_attrvp = NULLVP;
 928  928          }
 929  929  
 930  930          if (dv->sdev_attr != NULL) {
 931  931                  kmem_free(dv->sdev_attr, sizeof (struct vattr));
 932  932                  dv->sdev_attr = NULL;
 933  933          }
 934  934  
 935  935          if (dv->sdev_name != NULL) {
 936  936                  kmem_free(dv->sdev_name, dv->sdev_namelen + 1);
 937  937                  dv->sdev_name = NULL;
 938  938          }
 939  939  
 940  940          if (dv->sdev_symlink != NULL) {
 941  941                  kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1);
 942  942                  dv->sdev_symlink = NULL;
 943  943          }
 944  944  
 945  945          if (dv->sdev_path) {
 946  946                  kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1);
 947  947                  dv->sdev_path = NULL;
 948  948          }
 949  949  
 950  950          if (!SDEV_IS_GLOBAL(dv))
 951  951                  sdev_prof_free(dv);
 952  952  
 953  953          if (SDEVTOV(dv)->v_type == VDIR) {
 954  954                  ASSERT(SDEV_FIRST_ENTRY(dv) == NULL);
  
    | 
      ↓ open down ↓ | 
    954 lines elided | 
    
      ↑ open up ↑ | 
  
 955  955                  avl_destroy(&dv->sdev_entries);
 956  956          }
 957  957  
 958  958          mutex_destroy(&dv->sdev_lookup_lock);
 959  959          cv_destroy(&dv->sdev_lookup_cv);
 960  960  
 961  961          /* return node to initial state as per constructor */
 962  962          (void) memset((void *)&dv->sdev_instance_data, 0,
 963  963              sizeof (dv->sdev_instance_data));
 964  964          vn_invalid(SDEVTOV(dv));
      965 +        dv->sdev_private = NULL;
 965  966          kmem_cache_free(sdev_node_cache, dv);
 966  967  }
 967  968  
 968  969  /*
 969  970   * DIRECTORY CACHE lookup
 970  971   */
 971  972  struct sdev_node *
 972  973  sdev_findbyname(struct sdev_node *ddv, char *nm)
 973  974  {
 974  975          struct sdev_node *dv;
 975  976          struct sdev_node dvtmp;
 976  977          avl_index_t     where;
 977  978  
 978  979          ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
 979  980  
 980  981          dvtmp.sdev_name = nm;
 981  982          dv = avl_find(&ddv->sdev_entries, &dvtmp, &where);
 982  983          if (dv) {
 983  984                  ASSERT(dv->sdev_dotdot == ddv);
 984  985                  ASSERT(strcmp(dv->sdev_name, nm) == 0);
 985  986                  ASSERT(dv->sdev_state != SDEV_ZOMBIE);
 986  987                  SDEV_HOLD(dv);
 987  988                  return (dv);
 988  989          }
 989  990          return (NULL);
 990  991  }
 991  992  
 992  993  /*
 993  994   * Inserts a new sdev_node in a parent directory
 994  995   */
 995  996  void
 996  997  sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv)
 997  998  {
 998  999          avl_index_t where;
 999 1000  
1000 1001          ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1001 1002          ASSERT(SDEVTOV(ddv)->v_type == VDIR);
1002 1003          ASSERT(ddv->sdev_nlink >= 2);
1003 1004          ASSERT(dv->sdev_nlink == 0);
1004 1005          ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1005 1006  
1006 1007          dv->sdev_dotdot = ddv;
1007 1008          VERIFY(avl_find(&ddv->sdev_entries, dv, &where) == NULL);
1008 1009          avl_insert(&ddv->sdev_entries, dv, where);
1009 1010          ddv->sdev_nlink++;
1010 1011  }
1011 1012  
1012 1013  /*
1013 1014   * The following check is needed because while sdev_nodes are linked
1014 1015   * in SDEV_INIT state, they have their link counts incremented only
1015 1016   * in SDEV_READY state.
1016 1017   */
1017 1018  static void
1018 1019  decr_link(struct sdev_node *dv)
1019 1020  {
1020 1021          VERIFY(RW_WRITE_HELD(&dv->sdev_contents));
1021 1022          if (dv->sdev_state != SDEV_INIT) {
1022 1023                  VERIFY(dv->sdev_nlink >= 1);
1023 1024                  dv->sdev_nlink--;
1024 1025          } else {
1025 1026                  VERIFY(dv->sdev_nlink == 0);
1026 1027          }
1027 1028  }
1028 1029  
1029 1030  /*
1030 1031   * Delete an existing dv from directory cache
1031 1032   *
1032 1033   * In the case of a node is still held by non-zero reference count, the node is
1033 1034   * put into ZOMBIE state. The node is always unlinked from its parent, but it is
1034 1035   * not destroyed via sdev_inactive until its reference count reaches "0".
1035 1036   */
1036 1037  static void
1037 1038  sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv)
1038 1039  {
1039 1040          struct vnode *vp;
1040 1041          sdev_node_state_t os;
1041 1042  
1042 1043          ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1043 1044  
1044 1045          vp = SDEVTOV(dv);
1045 1046          mutex_enter(&vp->v_lock);
1046 1047          rw_enter(&dv->sdev_contents, RW_WRITER);
1047 1048          os = dv->sdev_state;
1048 1049          ASSERT(os != SDEV_ZOMBIE);
1049 1050          dv->sdev_state = SDEV_ZOMBIE;
1050 1051  
1051 1052          /*
1052 1053           * unlink ourselves from the parent directory now to take care of the ..
1053 1054           * link. However, if we're a directory, we don't remove our reference to
1054 1055           * ourself eg. '.' until we are torn down in the inactive callback.
1055 1056           */
1056 1057          decr_link(ddv);
1057 1058          avl_remove(&ddv->sdev_entries, dv);
1058 1059          /*
1059 1060           * sdev_inactive expects nodes to have a link to themselves when we're
1060 1061           * tearing them down. If we're transitioning from the initial state to
1061 1062           * zombie and not via ready, then we're not going to have this link that
1062 1063           * comes from the node being ready. As a result, we need to increment
1063 1064           * our link count by one to account for this.
1064 1065           */
1065 1066          if (os == SDEV_INIT && dv->sdev_nlink == 0)
1066 1067                  dv->sdev_nlink++;
1067 1068          rw_exit(&dv->sdev_contents);
1068 1069          mutex_exit(&vp->v_lock);
1069 1070  }
1070 1071  
1071 1072  /*
1072 1073   * check if the source is in the path of the target
1073 1074   *
1074 1075   * source and target are different
1075 1076   */
1076 1077  /*ARGSUSED2*/
1077 1078  static int
1078 1079  sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred)
1079 1080  {
1080 1081          int error = 0;
1081 1082          struct sdev_node *dotdot, *dir;
1082 1083  
1083 1084          dotdot = tdv->sdev_dotdot;
1084 1085          ASSERT(dotdot);
1085 1086  
1086 1087          /* fs root */
1087 1088          if (dotdot == tdv) {
1088 1089                  return (0);
1089 1090          }
1090 1091  
1091 1092          for (;;) {
1092 1093                  /*
1093 1094                   * avoid error cases like
1094 1095                   *      mv a a/b
1095 1096                   *      mv a a/b/c
1096 1097                   *      etc.
1097 1098                   */
1098 1099                  if (dotdot == sdv) {
1099 1100                          error = EINVAL;
1100 1101                          break;
1101 1102                  }
1102 1103  
1103 1104                  dir = dotdot;
1104 1105                  dotdot = dir->sdev_dotdot;
1105 1106  
1106 1107                  /* done checking because root is reached */
1107 1108                  if (dir == dotdot) {
1108 1109                          break;
1109 1110                  }
1110 1111          }
1111 1112          return (error);
1112 1113  }
1113 1114  
1114 1115  int
1115 1116  sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv,
1116 1117      struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm,
1117 1118      struct cred *cred)
1118 1119  {
1119 1120          int error = 0;
1120 1121          struct vnode *ovp = SDEVTOV(odv);
1121 1122          struct vnode *nvp;
1122 1123          struct vattr vattr;
1123 1124          int doingdir = (ovp->v_type == VDIR);
1124 1125          char *link = NULL;
1125 1126          int samedir = (oddv == nddv) ? 1 : 0;
1126 1127          int bkstore = 0;
1127 1128          struct sdev_node *idv = NULL;
1128 1129          struct sdev_node *ndv = NULL;
1129 1130          timestruc_t now;
1130 1131  
1131 1132          vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
1132 1133          error = VOP_GETATTR(ovp, &vattr, 0, cred, NULL);
1133 1134          if (error)
1134 1135                  return (error);
1135 1136  
1136 1137          if (!samedir)
1137 1138                  rw_enter(&oddv->sdev_contents, RW_WRITER);
1138 1139          rw_enter(&nddv->sdev_contents, RW_WRITER);
1139 1140  
1140 1141          /*
1141 1142           * the source may have been deleted by another thread before
1142 1143           * we gets here.
1143 1144           */
1144 1145          if (odv->sdev_state != SDEV_READY) {
1145 1146                  error = ENOENT;
1146 1147                  goto err_out;
1147 1148          }
1148 1149  
1149 1150          if (doingdir && (odv == nddv)) {
1150 1151                  error = EINVAL;
1151 1152                  goto err_out;
1152 1153          }
1153 1154  
1154 1155          /*
1155 1156           * If renaming a directory, and the parents are different (".." must be
1156 1157           * changed) then the source dir must not be in the dir hierarchy above
1157 1158           * the target since it would orphan everything below the source dir.
1158 1159           */
1159 1160          if (doingdir && (oddv != nddv)) {
1160 1161                  error = sdev_checkpath(odv, nddv, cred);
1161 1162                  if (error)
1162 1163                          goto err_out;
1163 1164          }
1164 1165  
1165 1166          /* fix the source for a symlink */
1166 1167          if (vattr.va_type == VLNK) {
1167 1168                  if (odv->sdev_symlink == NULL) {
1168 1169                          error = sdev_follow_link(odv);
1169 1170                          if (error) {
1170 1171                                  /*
1171 1172                                   * The underlying symlink doesn't exist. This
1172 1173                                   * node probably shouldn't even exist. While
1173 1174                                   * it's a bit jarring to consumers, we're going
1174 1175                                   * to remove the node from /dev.
1175 1176                                   */
1176 1177                                  if (SDEV_IS_PERSIST((*ndvp)))
1177 1178                                          bkstore = 1;
1178 1179                                  sdev_dirdelete(oddv, odv);
1179 1180                                  if (bkstore) {
1180 1181                                          ASSERT(nddv->sdev_attrvp);
1181 1182                                          error = VOP_REMOVE(nddv->sdev_attrvp,
1182 1183                                              nnm, cred, NULL, 0);
1183 1184                                          if (error)
1184 1185                                                  goto err_out;
1185 1186                                  }
1186 1187                                  error = ENOENT;
1187 1188                                  goto err_out;
1188 1189                          }
1189 1190                  }
1190 1191                  ASSERT(odv->sdev_symlink);
1191 1192                  link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP);
1192 1193          }
1193 1194  
1194 1195          /* destination existing */
1195 1196          if (*ndvp) {
1196 1197                  nvp = SDEVTOV(*ndvp);
1197 1198                  ASSERT(nvp);
1198 1199  
1199 1200                  /* handling renaming to itself */
1200 1201                  if (odv == *ndvp) {
1201 1202                          error = 0;
1202 1203                          goto err_out;
1203 1204                  }
1204 1205  
1205 1206                  if (nvp->v_type == VDIR) {
1206 1207                          if (!doingdir) {
1207 1208                                  error = EISDIR;
1208 1209                                  goto err_out;
1209 1210                          }
1210 1211  
1211 1212                          if (vn_vfswlock(nvp)) {
1212 1213                                  error = EBUSY;
1213 1214                                  goto err_out;
1214 1215                          }
1215 1216  
1216 1217                          if (vn_mountedvfs(nvp) != NULL) {
1217 1218                                  vn_vfsunlock(nvp);
1218 1219                                  error = EBUSY;
1219 1220                                  goto err_out;
1220 1221                          }
1221 1222  
1222 1223                          /* in case dir1 exists in dir2 and "mv dir1 dir2" */
1223 1224                          if ((*ndvp)->sdev_nlink > 2) {
1224 1225                                  vn_vfsunlock(nvp);
1225 1226                                  error = EEXIST;
1226 1227                                  goto err_out;
1227 1228                          }
1228 1229                          vn_vfsunlock(nvp);
1229 1230  
1230 1231                          /*
1231 1232                           * We did not place the hold on *ndvp, so even though
1232 1233                           * we're deleting the node, we should not get rid of our
1233 1234                           * reference.
1234 1235                           */
1235 1236                          sdev_dirdelete(nddv, *ndvp);
1236 1237                          *ndvp = NULL;
1237 1238                          ASSERT(nddv->sdev_attrvp);
1238 1239                          error = VOP_RMDIR(nddv->sdev_attrvp, nnm,
1239 1240                              nddv->sdev_attrvp, cred, NULL, 0);
1240 1241                          if (error)
1241 1242                                  goto err_out;
1242 1243                  } else {
1243 1244                          if (doingdir) {
1244 1245                                  error = ENOTDIR;
1245 1246                                  goto err_out;
1246 1247                          }
1247 1248  
1248 1249                          if (SDEV_IS_PERSIST((*ndvp))) {
1249 1250                                  bkstore = 1;
1250 1251                          }
1251 1252  
1252 1253                          /*
1253 1254                           * Get rid of the node from the directory cache note.
1254 1255                           * Don't forget that it's not up to us to remove the vn
1255 1256                           * ref on the sdev node, as we did not place it.
1256 1257                           */
1257 1258                          sdev_dirdelete(nddv, *ndvp);
1258 1259                          *ndvp = NULL;
1259 1260                          if (bkstore) {
1260 1261                                  ASSERT(nddv->sdev_attrvp);
1261 1262                                  error = VOP_REMOVE(nddv->sdev_attrvp,
1262 1263                                      nnm, cred, NULL, 0);
1263 1264                                  if (error)
1264 1265                                          goto err_out;
1265 1266                          }
1266 1267                  }
1267 1268          }
1268 1269  
1269 1270          /*
1270 1271           * make a fresh node from the source attrs
1271 1272           */
1272 1273          ASSERT(RW_WRITE_HELD(&nddv->sdev_contents));
1273 1274          error = sdev_mknode(nddv, nnm, ndvp, &vattr,
1274 1275              NULL, (void *)link, cred, SDEV_READY);
1275 1276  
1276 1277          if (link != NULL) {
1277 1278                  kmem_free(link, strlen(link) + 1);
1278 1279                  link = NULL;
1279 1280          }
1280 1281  
1281 1282          if (error)
1282 1283                  goto err_out;
1283 1284          ASSERT(*ndvp);
1284 1285          ASSERT((*ndvp)->sdev_state == SDEV_READY);
1285 1286  
1286 1287          /* move dir contents */
1287 1288          if (doingdir) {
1288 1289                  for (idv = SDEV_FIRST_ENTRY(odv); idv;
1289 1290                      idv = SDEV_NEXT_ENTRY(odv, idv)) {
1290 1291                          SDEV_HOLD(idv);
1291 1292                          error = sdev_rnmnode(odv, idv,
1292 1293                              (struct sdev_node *)(*ndvp), &ndv,
1293 1294                              idv->sdev_name, cred);
1294 1295                          SDEV_RELE(idv);
1295 1296                          if (error)
1296 1297                                  goto err_out;
1297 1298                          ndv = NULL;
1298 1299                  }
1299 1300          }
1300 1301  
1301 1302          if ((*ndvp)->sdev_attrvp) {
1302 1303                  sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred,
1303 1304                      AT_CTIME|AT_ATIME);
1304 1305          } else {
1305 1306                  ASSERT((*ndvp)->sdev_attr);
1306 1307                  gethrestime(&now);
1307 1308                  (*ndvp)->sdev_attr->va_ctime = now;
1308 1309                  (*ndvp)->sdev_attr->va_atime = now;
1309 1310          }
1310 1311  
1311 1312          if (nddv->sdev_attrvp) {
1312 1313                  sdev_update_timestamps(nddv->sdev_attrvp, kcred,
1313 1314                      AT_MTIME|AT_ATIME);
1314 1315          } else {
1315 1316                  ASSERT(nddv->sdev_attr);
1316 1317                  gethrestime(&now);
1317 1318                  nddv->sdev_attr->va_mtime = now;
1318 1319                  nddv->sdev_attr->va_atime = now;
1319 1320          }
1320 1321          rw_exit(&nddv->sdev_contents);
1321 1322          if (!samedir)
1322 1323                  rw_exit(&oddv->sdev_contents);
1323 1324  
1324 1325          SDEV_RELE(*ndvp);
1325 1326          return (error);
1326 1327  
1327 1328  err_out:
1328 1329          if (link != NULL) {
1329 1330                  kmem_free(link, strlen(link) + 1);
1330 1331                  link = NULL;
1331 1332          }
1332 1333  
1333 1334          rw_exit(&nddv->sdev_contents);
1334 1335          if (!samedir)
1335 1336                  rw_exit(&oddv->sdev_contents);
1336 1337          return (error);
1337 1338  }
1338 1339  
1339 1340  /*
1340 1341   * Merge sdev_node specific information into an attribute structure.
1341 1342   *
1342 1343   * note: sdev_node is not locked here
1343 1344   */
1344 1345  void
1345 1346  sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap)
1346 1347  {
1347 1348          struct vnode *vp = SDEVTOV(dv);
1348 1349  
1349 1350          vap->va_nlink = dv->sdev_nlink;
1350 1351          vap->va_nodeid = dv->sdev_ino;
1351 1352          vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev;
1352 1353          vap->va_type = vp->v_type;
1353 1354  
1354 1355          if (vp->v_type == VDIR) {
1355 1356                  vap->va_rdev = 0;
1356 1357                  vap->va_fsid = vp->v_rdev;
1357 1358          } else if (vp->v_type == VLNK) {
1358 1359                  vap->va_rdev = 0;
1359 1360                  vap->va_mode  &= ~S_IFMT;
1360 1361                  vap->va_mode |= S_IFLNK;
1361 1362          } else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) {
1362 1363                  vap->va_rdev = vp->v_rdev;
1363 1364                  vap->va_mode &= ~S_IFMT;
1364 1365                  if (vap->va_type == VCHR)
1365 1366                          vap->va_mode |= S_IFCHR;
1366 1367                  else
1367 1368                          vap->va_mode |= S_IFBLK;
1368 1369          } else {
1369 1370                  vap->va_rdev = 0;
1370 1371          }
1371 1372  }
1372 1373  
1373 1374  struct vattr *
1374 1375  sdev_getdefault_attr(enum vtype type)
1375 1376  {
1376 1377          if (type == VDIR)
1377 1378                  return (&sdev_vattr_dir);
1378 1379          else if (type == VCHR)
1379 1380                  return (&sdev_vattr_chr);
1380 1381          else if (type == VBLK)
1381 1382                  return (&sdev_vattr_blk);
1382 1383          else if (type == VLNK)
1383 1384                  return (&sdev_vattr_lnk);
1384 1385          else
1385 1386                  return (NULL);
1386 1387  }
1387 1388  int
1388 1389  sdev_to_vp(struct sdev_node *dv, struct vnode **vpp)
1389 1390  {
1390 1391          int rv = 0;
1391 1392          struct vnode *vp = SDEVTOV(dv);
1392 1393  
1393 1394          switch (vp->v_type) {
1394 1395          case VCHR:
1395 1396          case VBLK:
1396 1397                  /*
1397 1398                   * If vnode is a device, return special vnode instead
1398 1399                   * (though it knows all about -us- via sp->s_realvp)
1399 1400                   */
1400 1401                  *vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred);
1401 1402                  VN_RELE(vp);
1402 1403                  if (*vpp == NULLVP)
1403 1404                          rv = ENOSYS;
1404 1405                  break;
1405 1406          default:        /* most types are returned as is */
1406 1407                  *vpp = vp;
1407 1408                  break;
1408 1409          }
1409 1410          return (rv);
1410 1411  }
1411 1412  
1412 1413  /*
1413 1414   * junction between devname and root file system, e.g. ufs
1414 1415   */
1415 1416  int
1416 1417  devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp)
1417 1418  {
1418 1419          struct vnode *rdvp = ddv->sdev_attrvp;
1419 1420          int rval = 0;
1420 1421  
1421 1422          ASSERT(rdvp);
1422 1423  
1423 1424          rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred, NULL, NULL,
1424 1425              NULL);
1425 1426          return (rval);
1426 1427  }
1427 1428  
1428 1429  static int
1429 1430  sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred)
1430 1431  {
1431 1432          struct sdev_node *dv = NULL;
1432 1433          char    *nm;
1433 1434          struct vnode *dirvp;
1434 1435          int     error;
1435 1436          vnode_t *vp;
1436 1437          int eof;
1437 1438          struct iovec iov;
1438 1439          struct uio uio;
1439 1440          struct dirent64 *dp;
1440 1441          dirent64_t *dbuf;
1441 1442          size_t dbuflen;
1442 1443          struct vattr vattr;
1443 1444          char *link = NULL;
1444 1445  
1445 1446          if (ddv->sdev_attrvp == NULL)
1446 1447                  return (0);
1447 1448          if (!(ddv->sdev_flags & SDEV_BUILD))
1448 1449                  return (0);
1449 1450  
1450 1451          dirvp = ddv->sdev_attrvp;
1451 1452          VN_HOLD(dirvp);
1452 1453          dbuf = kmem_zalloc(dlen, KM_SLEEP);
1453 1454  
1454 1455          uio.uio_iov = &iov;
1455 1456          uio.uio_iovcnt = 1;
1456 1457          uio.uio_segflg = UIO_SYSSPACE;
1457 1458          uio.uio_fmode = 0;
1458 1459          uio.uio_extflg = UIO_COPY_CACHED;
1459 1460          uio.uio_loffset = 0;
1460 1461          uio.uio_llimit = MAXOFFSET_T;
1461 1462  
1462 1463          eof = 0;
1463 1464          error = 0;
1464 1465          while (!error && !eof) {
1465 1466                  uio.uio_resid = dlen;
1466 1467                  iov.iov_base = (char *)dbuf;
1467 1468                  iov.iov_len = dlen;
1468 1469                  (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1469 1470                  error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0);
1470 1471                  VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1471 1472  
1472 1473                  dbuflen = dlen - uio.uio_resid;
1473 1474                  if (error || dbuflen == 0)
1474 1475                          break;
1475 1476  
1476 1477                  if (!(ddv->sdev_flags & SDEV_BUILD))
1477 1478                          break;
1478 1479  
1479 1480                  for (dp = dbuf; ((intptr_t)dp <
1480 1481                      (intptr_t)dbuf + dbuflen);
1481 1482                      dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
1482 1483                          nm = dp->d_name;
1483 1484  
1484 1485                          if (strcmp(nm, ".") == 0 ||
1485 1486                              strcmp(nm, "..") == 0)
1486 1487                                  continue;
1487 1488  
1488 1489                          vp = NULLVP;
1489 1490                          dv = sdev_cache_lookup(ddv, nm);
1490 1491                          if (dv) {
1491 1492                                  VERIFY(dv->sdev_state != SDEV_ZOMBIE);
1492 1493                                  SDEV_SIMPLE_RELE(dv);
1493 1494                                  continue;
1494 1495                          }
1495 1496  
1496 1497                          /* refill the cache if not already */
1497 1498                          error = devname_backstore_lookup(ddv, nm, &vp);
1498 1499                          if (error)
1499 1500                                  continue;
1500 1501  
1501 1502                          vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
1502 1503                          error = VOP_GETATTR(vp, &vattr, 0, cred, NULL);
1503 1504                          if (error)
1504 1505                                  continue;
1505 1506  
1506 1507                          if (vattr.va_type == VLNK) {
1507 1508                                  error = sdev_getlink(vp, &link);
1508 1509                                  if (error) {
1509 1510                                          continue;
1510 1511                                  }
1511 1512                                  ASSERT(link != NULL);
1512 1513                          }
1513 1514  
1514 1515                          if (!rw_tryupgrade(&ddv->sdev_contents)) {
1515 1516                                  rw_exit(&ddv->sdev_contents);
1516 1517                                  rw_enter(&ddv->sdev_contents, RW_WRITER);
1517 1518                          }
1518 1519                          error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link,
1519 1520                              cred, SDEV_READY);
1520 1521                          rw_downgrade(&ddv->sdev_contents);
1521 1522  
1522 1523                          if (link != NULL) {
1523 1524                                  kmem_free(link, strlen(link) + 1);
1524 1525                                  link = NULL;
1525 1526                          }
1526 1527  
1527 1528                          if (!error) {
1528 1529                                  ASSERT(dv);
1529 1530                                  ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1530 1531                                  SDEV_SIMPLE_RELE(dv);
1531 1532                          }
1532 1533                          vp = NULL;
1533 1534                          dv = NULL;
1534 1535                  }
1535 1536          }
1536 1537  
1537 1538  done:
1538 1539          VN_RELE(dirvp);
1539 1540          kmem_free(dbuf, dlen);
1540 1541  
1541 1542          return (error);
1542 1543  }
1543 1544  
1544 1545  void
1545 1546  sdev_filldir_dynamic(struct sdev_node *ddv)
1546 1547  {
1547 1548          int error;
1548 1549          int i;
1549 1550          struct vattr vattr;
1550 1551          struct vattr *vap = &vattr;
1551 1552          char *nm = NULL;
1552 1553          struct sdev_node *dv = NULL;
1553 1554  
1554 1555          ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1555 1556          ASSERT((ddv->sdev_flags & SDEV_BUILD));
1556 1557  
1557 1558          *vap = *sdev_getdefault_attr(VDIR);     /* note structure copy here */
1558 1559          gethrestime(&vap->va_atime);
1559 1560          vap->va_mtime = vap->va_atime;
1560 1561          vap->va_ctime = vap->va_atime;
1561 1562          for (i = 0; vtab[i].vt_name != NULL; i++) {
1562 1563                  /*
1563 1564                   * This early, we may be in a read-only /dev environment: leave
1564 1565                   * the creation of any nodes we'd attempt to persist to
1565 1566                   * devfsadm. Because /dev itself is normally persistent, any
1566 1567                   * node which is not marked dynamic will end up being marked
1567 1568                   * persistent. However, some nodes are both dynamic and
1568 1569                   * persistent, mostly lofi and rlofi, so we need to be careful
1569 1570                   * in our check.
1570 1571                   */
1571 1572                  if ((vtab[i].vt_flags & SDEV_PERSIST) ||
1572 1573                      !(vtab[i].vt_flags & SDEV_DYNAMIC))
1573 1574                          continue;
1574 1575                  nm = vtab[i].vt_name;
1575 1576                  ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1576 1577                  dv = NULL;
1577 1578                  error = sdev_mknode(ddv, nm, &dv, vap, NULL,
1578 1579                      NULL, kcred, SDEV_READY);
1579 1580                  if (error) {
1580 1581                          cmn_err(CE_WARN, "%s/%s: error %d\n",
1581 1582                              ddv->sdev_name, nm, error);
1582 1583                  } else {
1583 1584                          ASSERT(dv);
1584 1585                          ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1585 1586                          SDEV_SIMPLE_RELE(dv);
1586 1587                  }
1587 1588          }
1588 1589  }
1589 1590  
1590 1591  /*
1591 1592   * Creating a backing store entry based on sdev_attr.
1592 1593   * This is called either as part of node creation in a persistent directory
1593 1594   * or from setattr/setsecattr to persist access attributes across reboot.
1594 1595   */
1595 1596  int
1596 1597  sdev_shadow_node(struct sdev_node *dv, struct cred *cred)
1597 1598  {
1598 1599          int error = 0;
1599 1600          struct vnode *dvp = SDEVTOV(dv->sdev_dotdot);
1600 1601          struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp;
1601 1602          struct vattr *vap = dv->sdev_attr;
1602 1603          char *nm = dv->sdev_name;
1603 1604          struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL;
1604 1605  
1605 1606          ASSERT(dv && dv->sdev_name && rdvp);
1606 1607          ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL);
1607 1608  
1608 1609  lookup:
1609 1610          /* try to find it in the backing store */
1610 1611          error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred, NULL, NULL,
1611 1612              NULL);
1612 1613          if (error == 0) {
1613 1614                  if (VOP_REALVP(*rvp, &rrvp, NULL) == 0) {
1614 1615                          VN_HOLD(rrvp);
1615 1616                          VN_RELE(*rvp);
1616 1617                          *rvp = rrvp;
1617 1618                  }
1618 1619  
1619 1620                  kmem_free(dv->sdev_attr, sizeof (vattr_t));
1620 1621                  dv->sdev_attr = NULL;
1621 1622                  dv->sdev_attrvp = *rvp;
1622 1623                  return (0);
1623 1624          }
1624 1625  
1625 1626          /* let's try to persist the node */
1626 1627          gethrestime(&vap->va_atime);
1627 1628          vap->va_mtime = vap->va_atime;
1628 1629          vap->va_ctime = vap->va_atime;
1629 1630          vap->va_mask |= AT_TYPE|AT_MODE;
1630 1631          switch (vap->va_type) {
1631 1632          case VDIR:
1632 1633                  error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL);
1633 1634                  sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n",
1634 1635                      (void *)(*rvp), error));
1635 1636                  if (!error)
1636 1637                          VN_RELE(*rvp);
1637 1638                  break;
1638 1639          case VCHR:
1639 1640          case VBLK:
1640 1641          case VREG:
1641 1642          case VDOOR:
1642 1643                  error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE,
1643 1644                      rvp, cred, 0, NULL, NULL);
1644 1645                  sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n",
1645 1646                      (void *)(*rvp), error));
1646 1647                  if (!error)
1647 1648                          VN_RELE(*rvp);
1648 1649                  break;
1649 1650          case VLNK:
1650 1651                  ASSERT(dv->sdev_symlink);
1651 1652                  error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred,
1652 1653                      NULL, 0);
1653 1654                  sdcmn_err9(("sdev_shadow_node: create symlink error %d\n",
1654 1655                      error));
1655 1656                  break;
1656 1657          default:
1657 1658                  cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node "
1658 1659                      "create\n", nm);
1659 1660                  /*NOTREACHED*/
1660 1661          }
1661 1662  
1662 1663          /* go back to lookup to factor out spec node and set attrvp */
1663 1664          if (error == 0)
1664 1665                  goto lookup;
1665 1666  
1666 1667          sdcmn_err(("cannot persist %s - error %d\n", dv->sdev_path, error));
1667 1668          return (error);
1668 1669  }
1669 1670  
1670 1671  static void
1671 1672  sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm)
1672 1673  {
1673 1674          struct sdev_node *dup = NULL;
1674 1675  
1675 1676          ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1676 1677          if ((dup = sdev_findbyname(ddv, nm)) == NULL) {
1677 1678                  sdev_direnter(ddv, *dv);
1678 1679          } else {
1679 1680                  VERIFY(dup->sdev_state != SDEV_ZOMBIE);
1680 1681                  SDEV_SIMPLE_RELE(*dv);
1681 1682                  sdev_nodedestroy(*dv, 0);
1682 1683                  *dv = dup;
1683 1684          }
1684 1685  }
1685 1686  
1686 1687  static void
1687 1688  sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv)
1688 1689  {
1689 1690          ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1690 1691          sdev_dirdelete(ddv, *dv);
1691 1692  }
1692 1693  
1693 1694  /*
1694 1695   * update the in-core directory cache
1695 1696   */
1696 1697  void
1697 1698  sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm,
1698 1699      sdev_cache_ops_t ops)
1699 1700  {
1700 1701          ASSERT((SDEV_HELD(*dv)));
1701 1702  
1702 1703          ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1703 1704          switch (ops) {
1704 1705          case SDEV_CACHE_ADD:
1705 1706                  sdev_cache_add(ddv, dv, nm);
1706 1707                  break;
1707 1708          case SDEV_CACHE_DELETE:
1708 1709                  sdev_cache_delete(ddv, dv);
1709 1710                  break;
1710 1711          default:
1711 1712                  break;
1712 1713          }
1713 1714  }
1714 1715  
1715 1716  /*
1716 1717   * retrieve the named entry from the directory cache
1717 1718   */
1718 1719  struct sdev_node *
1719 1720  sdev_cache_lookup(struct sdev_node *ddv, char *nm)
1720 1721  {
1721 1722          struct sdev_node *dv = NULL;
1722 1723  
1723 1724          ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
1724 1725          dv = sdev_findbyname(ddv, nm);
1725 1726  
1726 1727          return (dv);
1727 1728  }
1728 1729  
1729 1730  /*
1730 1731   * Implicit reconfig for nodes constructed by a link generator
1731 1732   * Start devfsadm if needed, or if devfsadm is in progress,
1732 1733   * prepare to block on devfsadm either completing or
1733 1734   * constructing the desired node.  As devfsadmd is global
1734 1735   * in scope, constructing all necessary nodes, we only
1735 1736   * need to initiate it once.
1736 1737   */
1737 1738  static int
1738 1739  sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm)
1739 1740  {
1740 1741          int error = 0;
1741 1742  
1742 1743          if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
1743 1744                  sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n",
1744 1745                      ddv->sdev_name, nm, devfsadm_state));
1745 1746                  mutex_enter(&dv->sdev_lookup_lock);
1746 1747                  SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING));
1747 1748                  mutex_exit(&dv->sdev_lookup_lock);
1748 1749                  error = 0;
1749 1750          } else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) {
1750 1751                  sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n",
1751 1752                      ddv->sdev_name, nm, devfsadm_state));
1752 1753  
1753 1754                  sdev_devfsadmd_thread(ddv, dv, kcred);
1754 1755                  mutex_enter(&dv->sdev_lookup_lock);
1755 1756                  SDEV_BLOCK_OTHERS(dv,
1756 1757                      (SDEV_LOOKUP | SDEV_LGWAITING));
1757 1758                  mutex_exit(&dv->sdev_lookup_lock);
1758 1759                  error = 0;
1759 1760          } else {
1760 1761                  error = -1;
1761 1762          }
1762 1763  
1763 1764          return (error);
1764 1765  }
1765 1766  
1766 1767  /*
1767 1768   *  Support for specialized device naming construction mechanisms
1768 1769   */
1769 1770  static int
1770 1771  sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm,
1771 1772      int (*callback)(struct sdev_node *, char *, void **, struct cred *,
1772 1773      void *, char *), int flags, struct cred *cred)
1773 1774  {
1774 1775          int rv = 0;
1775 1776          char *physpath = NULL;
1776 1777          struct vattr vattr;
1777 1778          struct vattr *vap = &vattr;
1778 1779          struct sdev_node *dv = NULL;
1779 1780  
1780 1781          ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1781 1782          if (flags & SDEV_VLINK) {
1782 1783                  physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1783 1784                  rv = callback(ddv, nm, (void *)&physpath, kcred, NULL,
1784 1785                      NULL);
1785 1786                  if (rv) {
1786 1787                          kmem_free(physpath, MAXPATHLEN);
1787 1788                          return (-1);
1788 1789                  }
1789 1790  
1790 1791                  *vap = *sdev_getdefault_attr(VLNK);     /* structure copy */
1791 1792                  vap->va_size = strlen(physpath);
1792 1793                  gethrestime(&vap->va_atime);
1793 1794                  vap->va_mtime = vap->va_atime;
1794 1795                  vap->va_ctime = vap->va_atime;
1795 1796  
1796 1797                  rv = sdev_mknode(ddv, nm, &dv, vap, NULL,
1797 1798                      (void *)physpath, cred, SDEV_READY);
1798 1799                  kmem_free(physpath, MAXPATHLEN);
1799 1800                  if (rv)
1800 1801                          return (rv);
1801 1802          } else if (flags & SDEV_VATTR) {
1802 1803                  /*
1803 1804                   * /dev/pts
1804 1805                   *
1805 1806                   * callback is responsible to set the basic attributes,
1806 1807                   * e.g. va_type/va_uid/va_gid/
1807 1808                   *    dev_t if VCHR or VBLK/
1808 1809                   */
1809 1810                  ASSERT(callback);
1810 1811                  rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL);
1811 1812                  if (rv) {
1812 1813                          sdcmn_err3(("devname_lookup_func: SDEV_NONE "
1813 1814                              "callback failed \n"));
1814 1815                          return (-1);
1815 1816                  }
1816 1817  
1817 1818                  rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL,
1818 1819                      cred, SDEV_READY);
1819 1820  
1820 1821                  if (rv)
1821 1822                          return (rv);
1822 1823  
1823 1824          } else {
1824 1825                  impossible(("lookup: %s/%s by %s not supported (%d)\n",
1825 1826                      SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm,
1826 1827                      __LINE__));
1827 1828                  rv = -1;
1828 1829          }
1829 1830  
1830 1831          *dvp = dv;
1831 1832          return (rv);
1832 1833  }
1833 1834  
1834 1835  static int
1835 1836  is_devfsadm_thread(char *exec_name)
1836 1837  {
1837 1838          /*
1838 1839           * note: because devfsadmd -> /usr/sbin/devfsadm
1839 1840           * it is safe to use "devfsadm" to capture the lookups
1840 1841           * from devfsadm and its daemon version.
1841 1842           */
1842 1843          if (strcmp(exec_name, "devfsadm") == 0)
1843 1844                  return (1);
1844 1845          return (0);
1845 1846  }
1846 1847  
1847 1848  /*
1848 1849   * Lookup Order:
1849 1850   *      sdev_node cache;
1850 1851   *      backing store (SDEV_PERSIST);
1851 1852   *      DBNR: a. dir_ops implemented in the loadable modules;
1852 1853   *            b. vnode ops in vtab.
1853 1854   */
1854 1855  int
1855 1856  devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp,
1856 1857      struct cred *cred, int (*callback)(struct sdev_node *, char *, void **,
1857 1858      struct cred *, void *, char *), int flags)
1858 1859  {
1859 1860          int rv = 0, nmlen;
1860 1861          struct vnode *rvp = NULL;
1861 1862          struct sdev_node *dv = NULL;
1862 1863          int     retried = 0;
1863 1864          int     error = 0;
1864 1865          struct vattr vattr;
1865 1866          char *lookup_thread = curproc->p_user.u_comm;
1866 1867          int failed_flags = 0;
1867 1868          int (*vtor)(struct sdev_node *) = NULL;
1868 1869          int state;
1869 1870          int parent_state;
1870 1871          char *link = NULL;
1871 1872  
1872 1873          if (SDEVTOV(ddv)->v_type != VDIR)
1873 1874                  return (ENOTDIR);
1874 1875  
1875 1876          /*
1876 1877           * Empty name or ., return node itself.
1877 1878           */
1878 1879          nmlen = strlen(nm);
1879 1880          if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) {
1880 1881                  *vpp = SDEVTOV(ddv);
1881 1882                  VN_HOLD(*vpp);
1882 1883                  return (0);
1883 1884          }
1884 1885  
1885 1886          /*
1886 1887           * .., return the parent directory
1887 1888           */
1888 1889          if ((nmlen == 2) && (strcmp(nm, "..") == 0)) {
1889 1890                  *vpp = SDEVTOV(ddv->sdev_dotdot);
1890 1891                  VN_HOLD(*vpp);
1891 1892                  return (0);
1892 1893          }
1893 1894  
1894 1895          rw_enter(&ddv->sdev_contents, RW_READER);
1895 1896          if (ddv->sdev_flags & SDEV_VTOR) {
1896 1897                  vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
1897 1898                  ASSERT(vtor);
1898 1899          }
1899 1900  
1900 1901  tryagain:
1901 1902          /*
1902 1903           * (a) directory cache lookup:
1903 1904           */
1904 1905          ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1905 1906          parent_state = ddv->sdev_state;
1906 1907          dv = sdev_cache_lookup(ddv, nm);
1907 1908          if (dv) {
1908 1909                  state = dv->sdev_state;
1909 1910                  switch (state) {
1910 1911                  case SDEV_INIT:
1911 1912                          if (is_devfsadm_thread(lookup_thread))
1912 1913                                  break;
1913 1914  
1914 1915                          /* ZOMBIED parent won't allow node creation */
1915 1916                          if (parent_state == SDEV_ZOMBIE) {
1916 1917                                  SD_TRACE_FAILED_LOOKUP(ddv, nm,
1917 1918                                      retried);
1918 1919                                  goto nolock_notfound;
1919 1920                          }
1920 1921  
1921 1922                          mutex_enter(&dv->sdev_lookup_lock);
1922 1923                          /* compensate the threads started after devfsadm */
1923 1924                          if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
1924 1925                              !(SDEV_IS_LOOKUP(dv)))
1925 1926                                  SDEV_BLOCK_OTHERS(dv,
1926 1927                                      (SDEV_LOOKUP | SDEV_LGWAITING));
1927 1928  
1928 1929                          if (SDEV_IS_LOOKUP(dv)) {
1929 1930                                  failed_flags |= SLF_REBUILT;
1930 1931                                  rw_exit(&ddv->sdev_contents);
1931 1932                                  error = sdev_wait4lookup(dv, SDEV_LOOKUP);
1932 1933                                  mutex_exit(&dv->sdev_lookup_lock);
1933 1934                                  rw_enter(&ddv->sdev_contents, RW_READER);
1934 1935  
1935 1936                                  if (error != 0) {
1936 1937                                          SD_TRACE_FAILED_LOOKUP(ddv, nm,
1937 1938                                              retried);
1938 1939                                          goto nolock_notfound;
1939 1940                                  }
1940 1941  
1941 1942                                  state = dv->sdev_state;
1942 1943                                  if (state == SDEV_INIT) {
1943 1944                                          SD_TRACE_FAILED_LOOKUP(ddv, nm,
1944 1945                                              retried);
1945 1946                                          goto nolock_notfound;
1946 1947                                  } else if (state == SDEV_READY) {
1947 1948                                          goto found;
1948 1949                                  } else if (state == SDEV_ZOMBIE) {
1949 1950                                          rw_exit(&ddv->sdev_contents);
1950 1951                                          SD_TRACE_FAILED_LOOKUP(ddv, nm,
1951 1952                                              retried);
1952 1953                                          SDEV_RELE(dv);
1953 1954                                          goto lookup_failed;
1954 1955                                  }
1955 1956                          } else {
1956 1957                                  mutex_exit(&dv->sdev_lookup_lock);
1957 1958                          }
1958 1959                          break;
1959 1960                  case SDEV_READY:
1960 1961                          goto found;
1961 1962                  case SDEV_ZOMBIE:
1962 1963                          rw_exit(&ddv->sdev_contents);
1963 1964                          SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1964 1965                          SDEV_RELE(dv);
1965 1966                          goto lookup_failed;
1966 1967                  default:
1967 1968                          rw_exit(&ddv->sdev_contents);
1968 1969                          SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1969 1970                          sdev_lookup_failed(ddv, nm, failed_flags);
1970 1971                          *vpp = NULLVP;
1971 1972                          return (ENOENT);
1972 1973                  }
1973 1974          }
1974 1975          ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1975 1976  
1976 1977          /*
1977 1978           * ZOMBIED parent does not allow new node creation.
1978 1979           * bail out early
1979 1980           */
1980 1981          if (parent_state == SDEV_ZOMBIE) {
1981 1982                  rw_exit(&ddv->sdev_contents);
1982 1983                  *vpp = NULLVP;
1983 1984                  SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1984 1985                  return (ENOENT);
1985 1986          }
1986 1987  
1987 1988          /*
1988 1989           * (b0): backing store lookup
1989 1990           *      SDEV_PERSIST is default except:
1990 1991           *              1) pts nodes
1991 1992           *              2) non-chmod'ed local nodes
1992 1993           *              3) zvol nodes
1993 1994           */
1994 1995          if (SDEV_IS_PERSIST(ddv)) {
1995 1996                  error = devname_backstore_lookup(ddv, nm, &rvp);
1996 1997  
1997 1998                  if (!error) {
1998 1999  
1999 2000                          vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
2000 2001                          error = VOP_GETATTR(rvp, &vattr, 0, cred, NULL);
2001 2002                          if (error) {
2002 2003                                  rw_exit(&ddv->sdev_contents);
2003 2004                                  if (dv)
2004 2005                                          SDEV_RELE(dv);
2005 2006                                  SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2006 2007                                  sdev_lookup_failed(ddv, nm, failed_flags);
2007 2008                                  *vpp = NULLVP;
2008 2009                                  return (ENOENT);
2009 2010                          }
2010 2011  
2011 2012                          if (vattr.va_type == VLNK) {
2012 2013                                  error = sdev_getlink(rvp, &link);
2013 2014                                  if (error) {
2014 2015                                          rw_exit(&ddv->sdev_contents);
2015 2016                                          if (dv)
2016 2017                                                  SDEV_RELE(dv);
2017 2018                                          SD_TRACE_FAILED_LOOKUP(ddv, nm,
2018 2019                                              retried);
2019 2020                                          sdev_lookup_failed(ddv, nm,
2020 2021                                              failed_flags);
2021 2022                                          *vpp = NULLVP;
2022 2023                                          return (ENOENT);
2023 2024                                  }
2024 2025                                  ASSERT(link != NULL);
2025 2026                          }
2026 2027  
2027 2028                          if (!rw_tryupgrade(&ddv->sdev_contents)) {
2028 2029                                  rw_exit(&ddv->sdev_contents);
2029 2030                                  rw_enter(&ddv->sdev_contents, RW_WRITER);
2030 2031                          }
2031 2032                          error = sdev_mknode(ddv, nm, &dv, &vattr,
2032 2033                              rvp, link, cred, SDEV_READY);
2033 2034                          rw_downgrade(&ddv->sdev_contents);
2034 2035  
2035 2036                          if (link != NULL) {
2036 2037                                  kmem_free(link, strlen(link) + 1);
2037 2038                                  link = NULL;
2038 2039                          }
2039 2040  
2040 2041                          if (error) {
2041 2042                                  SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2042 2043                                  rw_exit(&ddv->sdev_contents);
2043 2044                                  if (dv)
2044 2045                                          SDEV_RELE(dv);
2045 2046                                  goto lookup_failed;
2046 2047                          } else {
2047 2048                                  goto found;
2048 2049                          }
2049 2050                  } else if (retried) {
2050 2051                          rw_exit(&ddv->sdev_contents);
2051 2052                          sdcmn_err3(("retry of lookup of %s/%s: failed\n",
2052 2053                              ddv->sdev_name, nm));
2053 2054                          if (dv)
2054 2055                                  SDEV_RELE(dv);
2055 2056                          SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2056 2057                          sdev_lookup_failed(ddv, nm, failed_flags);
2057 2058                          *vpp = NULLVP;
2058 2059                          return (ENOENT);
2059 2060                  }
2060 2061          }
2061 2062  
2062 2063  lookup_create_node:
2063 2064          /* first thread that is doing the lookup on this node */
2064 2065          if (callback) {
2065 2066                  ASSERT(dv == NULL);
2066 2067                  if (!rw_tryupgrade(&ddv->sdev_contents)) {
2067 2068                          rw_exit(&ddv->sdev_contents);
2068 2069                          rw_enter(&ddv->sdev_contents, RW_WRITER);
2069 2070                  }
2070 2071                  error = sdev_call_dircallback(ddv, &dv, nm, callback,
2071 2072                      flags, cred);
2072 2073                  rw_downgrade(&ddv->sdev_contents);
2073 2074                  if (error == 0) {
2074 2075                          goto found;
2075 2076                  } else {
2076 2077                          SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2077 2078                          rw_exit(&ddv->sdev_contents);
2078 2079                          goto lookup_failed;
2079 2080                  }
2080 2081          }
2081 2082          if (!dv) {
2082 2083                  if (!rw_tryupgrade(&ddv->sdev_contents)) {
2083 2084                          rw_exit(&ddv->sdev_contents);
2084 2085                          rw_enter(&ddv->sdev_contents, RW_WRITER);
2085 2086                  }
2086 2087                  error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL,
2087 2088                      cred, SDEV_INIT);
2088 2089                  if (!dv) {
2089 2090                          rw_exit(&ddv->sdev_contents);
2090 2091                          SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2091 2092                          sdev_lookup_failed(ddv, nm, failed_flags);
2092 2093                          *vpp = NULLVP;
2093 2094                          return (ENOENT);
2094 2095                  }
2095 2096                  rw_downgrade(&ddv->sdev_contents);
2096 2097          }
2097 2098  
2098 2099          /*
2099 2100           * (b1) invoking devfsadm once per life time for devfsadm nodes
2100 2101           */
2101 2102          ASSERT(SDEV_HELD(dv));
2102 2103  
2103 2104          if (SDEV_IS_NO_NCACHE(dv))
2104 2105                  failed_flags |= SLF_NO_NCACHE;
2105 2106          if (sdev_reconfig_boot || !i_ddi_io_initialized() ||
2106 2107              SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) ||
2107 2108              ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) {
2108 2109                  ASSERT(SDEV_HELD(dv));
2109 2110                  SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2110 2111                  goto nolock_notfound;
2111 2112          }
2112 2113  
2113 2114          /*
2114 2115           * filter out known non-existent devices recorded
2115 2116           * during initial reconfiguration boot for which
2116 2117           * reconfig should not be done and lookup may
2117 2118           * be short-circuited now.
2118 2119           */
2119 2120          if (sdev_lookup_filter(ddv, nm)) {
2120 2121                  SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2121 2122                  goto nolock_notfound;
2122 2123          }
2123 2124  
2124 2125          /* bypassing devfsadm internal nodes */
2125 2126          if (is_devfsadm_thread(lookup_thread)) {
2126 2127                  SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2127 2128                  goto nolock_notfound;
2128 2129          }
2129 2130  
2130 2131          if (sdev_reconfig_disable) {
2131 2132                  SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2132 2133                  goto nolock_notfound;
2133 2134          }
2134 2135  
2135 2136          error = sdev_call_devfsadmd(ddv, dv, nm);
2136 2137          if (error == 0) {
2137 2138                  sdcmn_err8(("lookup of %s/%s by %s: reconfig\n",
2138 2139                      ddv->sdev_name, nm, curproc->p_user.u_comm));
2139 2140                  if (sdev_reconfig_verbose) {
2140 2141                          cmn_err(CE_CONT,
2141 2142                              "?lookup of %s/%s by %s: reconfig\n",
2142 2143                              ddv->sdev_name, nm, curproc->p_user.u_comm);
2143 2144                  }
2144 2145                  retried = 1;
2145 2146                  failed_flags |= SLF_REBUILT;
2146 2147                  ASSERT(dv->sdev_state != SDEV_ZOMBIE);
2147 2148                  SDEV_SIMPLE_RELE(dv);
2148 2149                  goto tryagain;
2149 2150          } else {
2150 2151                  SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2151 2152                  goto nolock_notfound;
2152 2153          }
2153 2154  
2154 2155  found:
2155 2156          ASSERT(dv->sdev_state == SDEV_READY);
2156 2157          if (vtor) {
2157 2158                  /*
2158 2159                   * Check validity of returned node
2159 2160                   */
2160 2161                  switch (vtor(dv)) {
2161 2162                  case SDEV_VTOR_VALID:
2162 2163                          break;
2163 2164                  case SDEV_VTOR_STALE:
2164 2165                          /*
2165 2166                           * The name exists, but the cache entry is
2166 2167                           * stale and needs to be re-created.
2167 2168                           */
2168 2169                          ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2169 2170                          if (rw_tryupgrade(&ddv->sdev_contents) == 0) {
2170 2171                                  rw_exit(&ddv->sdev_contents);
2171 2172                                  rw_enter(&ddv->sdev_contents, RW_WRITER);
2172 2173                          }
2173 2174                          sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE);
2174 2175                          rw_downgrade(&ddv->sdev_contents);
2175 2176                          SDEV_RELE(dv);
2176 2177                          dv = NULL;
2177 2178                          goto lookup_create_node;
2178 2179                          /* FALLTHRU */
2179 2180                  case SDEV_VTOR_INVALID:
2180 2181                          SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2181 2182                          sdcmn_err7(("lookup: destroy invalid "
2182 2183                              "node: %s(%p)\n", dv->sdev_name, (void *)dv));
2183 2184                          goto nolock_notfound;
2184 2185                  case SDEV_VTOR_SKIP:
2185 2186                          sdcmn_err7(("lookup: node not applicable - "
2186 2187                              "skipping: %s(%p)\n", dv->sdev_name, (void *)dv));
2187 2188                          rw_exit(&ddv->sdev_contents);
2188 2189                          SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2189 2190                          SDEV_RELE(dv);
2190 2191                          goto lookup_failed;
2191 2192                  default:
2192 2193                          cmn_err(CE_PANIC,
2193 2194                              "dev fs: validator failed: %s(%p)\n",
2194 2195                              dv->sdev_name, (void *)dv);
2195 2196                          break;
2196 2197                  }
2197 2198          }
2198 2199  
2199 2200          rw_exit(&ddv->sdev_contents);
2200 2201          rv = sdev_to_vp(dv, vpp);
2201 2202          sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d "
2202 2203              "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count,
2203 2204              dv->sdev_state, nm, rv));
2204 2205          return (rv);
2205 2206  
2206 2207  nolock_notfound:
2207 2208          /*
2208 2209           * Destroy the node that is created for synchronization purposes.
2209 2210           */
2210 2211          sdcmn_err3(("devname_lookup_func: %s with state %d\n",
2211 2212              nm, dv->sdev_state));
2212 2213          ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2213 2214          if (dv->sdev_state == SDEV_INIT) {
2214 2215                  if (!rw_tryupgrade(&ddv->sdev_contents)) {
2215 2216                          rw_exit(&ddv->sdev_contents);
2216 2217                          rw_enter(&ddv->sdev_contents, RW_WRITER);
2217 2218                  }
2218 2219  
2219 2220                  /*
2220 2221                   * Node state may have changed during the lock
2221 2222                   * changes. Re-check.
2222 2223                   */
2223 2224                  if (dv->sdev_state == SDEV_INIT) {
2224 2225                          sdev_dirdelete(ddv, dv);
2225 2226                          rw_exit(&ddv->sdev_contents);
2226 2227                          sdev_lookup_failed(ddv, nm, failed_flags);
2227 2228                          SDEV_RELE(dv);
2228 2229                          *vpp = NULL;
2229 2230                          return (ENOENT);
2230 2231                  }
2231 2232          }
2232 2233  
2233 2234          rw_exit(&ddv->sdev_contents);
2234 2235          SDEV_RELE(dv);
2235 2236  
2236 2237  lookup_failed:
2237 2238          sdev_lookup_failed(ddv, nm, failed_flags);
2238 2239          *vpp = NULL;
2239 2240          return (ENOENT);
2240 2241  }
2241 2242  
2242 2243  /*
2243 2244   * Given a directory node, mark all nodes beneath as
2244 2245   * STALE, i.e. nodes that don't exist as far as new
2245 2246   * consumers are concerned.  Remove them from the
2246 2247   * list of directory entries so that no lookup or
2247 2248   * directory traversal will find them.  The node
2248 2249   * not deallocated so existing holds are not affected.
2249 2250   */
2250 2251  void
2251 2252  sdev_stale(struct sdev_node *ddv)
2252 2253  {
2253 2254          struct sdev_node *dv;
2254 2255          struct vnode *vp;
2255 2256  
2256 2257          ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2257 2258  
2258 2259          rw_enter(&ddv->sdev_contents, RW_WRITER);
2259 2260          while ((dv = SDEV_FIRST_ENTRY(ddv)) != NULL) {
2260 2261                  vp = SDEVTOV(dv);
2261 2262                  SDEV_HOLD(dv);
2262 2263                  if (vp->v_type == VDIR)
2263 2264                          sdev_stale(dv);
2264 2265  
2265 2266                  sdev_dirdelete(ddv, dv);
2266 2267                  SDEV_RELE(dv);
2267 2268          }
2268 2269          ddv->sdev_flags |= SDEV_BUILD;
2269 2270          rw_exit(&ddv->sdev_contents);
2270 2271  }
2271 2272  
2272 2273  /*
2273 2274   * Given a directory node, clean out all the nodes beneath.
2274 2275   * If expr is specified, clean node with names matching expr.
2275 2276   * If SDEV_ENFORCE is specified in flags, busy nodes are made stale,
2276 2277   *      so they are excluded from future lookups.
2277 2278   */
2278 2279  int
2279 2280  sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags)
2280 2281  {
2281 2282          int error = 0;
2282 2283          int busy = 0;
2283 2284          struct vnode *vp;
2284 2285          struct sdev_node *dv, *next;
2285 2286          int bkstore = 0;
2286 2287          int len = 0;
2287 2288          char *bks_name = NULL;
2288 2289  
2289 2290          ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2290 2291  
2291 2292          /*
2292 2293           * We try our best to destroy all unused sdev_node's
2293 2294           */
2294 2295          rw_enter(&ddv->sdev_contents, RW_WRITER);
2295 2296          for (dv = SDEV_FIRST_ENTRY(ddv); dv != NULL; dv = next) {
2296 2297                  next = SDEV_NEXT_ENTRY(ddv, dv);
2297 2298                  vp = SDEVTOV(dv);
2298 2299  
2299 2300                  if (expr && gmatch(dv->sdev_name, expr) == 0)
2300 2301                          continue;
2301 2302  
2302 2303                  if (vp->v_type == VDIR &&
2303 2304                      sdev_cleandir(dv, NULL, flags) != 0) {
2304 2305                          sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2305 2306                              dv->sdev_name));
2306 2307                          busy++;
2307 2308                          continue;
2308 2309                  }
2309 2310  
2310 2311                  if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) {
2311 2312                          sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2312 2313                              dv->sdev_name));
2313 2314                          busy++;
2314 2315                          continue;
2315 2316                  }
2316 2317  
2317 2318                  /*
2318 2319                   * at this point, either dv is not held or SDEV_ENFORCE
2319 2320                   * is specified. In either case, dv needs to be deleted
2320 2321                   */
2321 2322                  SDEV_HOLD(dv);
2322 2323  
2323 2324                  bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0;
2324 2325                  if (bkstore && (vp->v_type == VDIR))
2325 2326                          bkstore += 1;
2326 2327  
2327 2328                  if (bkstore) {
2328 2329                          len = strlen(dv->sdev_name) + 1;
2329 2330                          bks_name = kmem_alloc(len, KM_SLEEP);
2330 2331                          bcopy(dv->sdev_name, bks_name, len);
2331 2332                  }
2332 2333  
2333 2334                  sdev_dirdelete(ddv, dv);
2334 2335  
2335 2336                  /* take care the backing store clean up */
2336 2337                  if (bkstore) {
2337 2338                          ASSERT(bks_name);
2338 2339                          ASSERT(ddv->sdev_attrvp);
2339 2340  
2340 2341                          if (bkstore == 1) {
2341 2342                                  error = VOP_REMOVE(ddv->sdev_attrvp,
2342 2343                                      bks_name, kcred, NULL, 0);
2343 2344                          } else if (bkstore == 2) {
2344 2345                                  error = VOP_RMDIR(ddv->sdev_attrvp,
2345 2346                                      bks_name, ddv->sdev_attrvp, kcred, NULL, 0);
2346 2347                          }
2347 2348  
2348 2349                          /* do not propagate the backing store errors */
2349 2350                          if (error) {
2350 2351                                  sdcmn_err9(("sdev_cleandir: backing store"
2351 2352                                      "not cleaned\n"));
2352 2353                                  error = 0;
2353 2354                          }
2354 2355  
2355 2356                          bkstore = 0;
2356 2357                          kmem_free(bks_name, len);
2357 2358                          bks_name = NULL;
2358 2359                          len = 0;
2359 2360                  }
2360 2361  
2361 2362                  ddv->sdev_flags |= SDEV_BUILD;
2362 2363                  SDEV_RELE(dv);
2363 2364          }
2364 2365  
2365 2366          ddv->sdev_flags |= SDEV_BUILD;
2366 2367          rw_exit(&ddv->sdev_contents);
2367 2368  
2368 2369          if (busy) {
2369 2370                  error = EBUSY;
2370 2371          }
2371 2372  
2372 2373          return (error);
2373 2374  }
2374 2375  
2375 2376  /*
2376 2377   * a convenient wrapper for readdir() funcs
2377 2378   */
2378 2379  size_t
2379 2380  add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off)
2380 2381  {
2381 2382          size_t reclen = DIRENT64_RECLEN(strlen(nm));
2382 2383          if (reclen > size)
2383 2384                  return (0);
2384 2385  
2385 2386          de->d_ino = (ino64_t)ino;
2386 2387          de->d_off = (off64_t)off + 1;
2387 2388          de->d_reclen = (ushort_t)reclen;
2388 2389          (void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen));
2389 2390          return (reclen);
2390 2391  }
2391 2392  
2392 2393  /*
2393 2394   * sdev_mount service routines
2394 2395   */
2395 2396  int
2396 2397  sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args)
2397 2398  {
2398 2399          int     error;
2399 2400  
2400 2401          if (uap->datalen != sizeof (*args))
2401 2402                  return (EINVAL);
2402 2403  
2403 2404          if (error = copyin(uap->dataptr, args, sizeof (*args))) {
2404 2405                  cmn_err(CE_WARN, "sdev_copyin_mountargs: can not"
2405 2406                      "get user data. error %d\n", error);
2406 2407                  return (EFAULT);
2407 2408          }
2408 2409  
2409 2410          return (0);
2410 2411  }
2411 2412  
2412 2413  #ifdef nextdp
2413 2414  #undef nextdp
2414 2415  #endif
2415 2416  #define nextdp(dp)      ((struct dirent64 *) \
2416 2417                              (intptr_t)((char *)(dp) + (dp)->d_reclen))
2417 2418  
2418 2419  /*
2419 2420   * readdir helper func
2420 2421   */
2421 2422  int
2422 2423  devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp,
2423 2424      int flags)
2424 2425  {
2425 2426          struct sdev_node *ddv = VTOSDEV(vp);
2426 2427          struct sdev_node *dv;
2427 2428          dirent64_t      *dp;
2428 2429          ulong_t         outcount = 0;
2429 2430          size_t          namelen;
2430 2431          ulong_t         alloc_count;
2431 2432          void            *outbuf;
2432 2433          struct iovec    *iovp;
2433 2434          int             error = 0;
2434 2435          size_t          reclen;
2435 2436          offset_t        diroff;
2436 2437          offset_t        soff;
2437 2438          int             this_reclen;
2438 2439          int (*vtor)(struct sdev_node *) = NULL;
2439 2440          struct vattr attr;
2440 2441          timestruc_t now;
2441 2442  
2442 2443          ASSERT(ddv->sdev_attr || ddv->sdev_attrvp);
2443 2444          ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2444 2445  
2445 2446          if (uiop->uio_loffset >= MAXOFF_T) {
2446 2447                  if (eofp)
2447 2448                          *eofp = 1;
2448 2449                  return (0);
2449 2450          }
2450 2451  
2451 2452          if (uiop->uio_iovcnt != 1)
2452 2453                  return (EINVAL);
2453 2454  
2454 2455          if (vp->v_type != VDIR)
2455 2456                  return (ENOTDIR);
2456 2457  
2457 2458          if (ddv->sdev_flags & SDEV_VTOR) {
2458 2459                  vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
2459 2460                  ASSERT(vtor);
2460 2461          }
2461 2462  
2462 2463          if (eofp != NULL)
2463 2464                  *eofp = 0;
2464 2465  
2465 2466          soff = uiop->uio_loffset;
2466 2467          iovp = uiop->uio_iov;
2467 2468          alloc_count = iovp->iov_len;
2468 2469          dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP);
2469 2470          outcount = 0;
2470 2471  
2471 2472          if (ddv->sdev_state == SDEV_ZOMBIE)
2472 2473                  goto get_cache;
2473 2474  
2474 2475          if (SDEV_IS_GLOBAL(ddv)) {
2475 2476  
2476 2477                  if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) &&
2477 2478                      !sdev_reconfig_boot && (flags & SDEV_BROWSE) &&
2478 2479                      !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) &&
2479 2480                      ((moddebug & MODDEBUG_FINI_EBUSY) == 0) &&
2480 2481                      !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) &&
2481 2482                      !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
2482 2483                      !sdev_reconfig_disable) {
2483 2484                          /*
2484 2485                           * invoking "devfsadm" to do system device reconfig
2485 2486                           */
2486 2487                          mutex_enter(&ddv->sdev_lookup_lock);
2487 2488                          SDEV_BLOCK_OTHERS(ddv,
2488 2489                              (SDEV_READDIR|SDEV_LGWAITING));
2489 2490                          mutex_exit(&ddv->sdev_lookup_lock);
2490 2491  
2491 2492                          sdcmn_err8(("readdir of %s by %s: reconfig\n",
2492 2493                              ddv->sdev_path, curproc->p_user.u_comm));
2493 2494                          if (sdev_reconfig_verbose) {
2494 2495                                  cmn_err(CE_CONT,
2495 2496                                      "?readdir of %s by %s: reconfig\n",
2496 2497                                      ddv->sdev_path, curproc->p_user.u_comm);
2497 2498                          }
2498 2499  
2499 2500                          sdev_devfsadmd_thread(ddv, NULL, kcred);
2500 2501                  } else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
2501 2502                          /*
2502 2503                           * compensate the "ls" started later than "devfsadm"
2503 2504                           */
2504 2505                          mutex_enter(&ddv->sdev_lookup_lock);
2505 2506                          SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING));
2506 2507                          mutex_exit(&ddv->sdev_lookup_lock);
2507 2508                  }
2508 2509  
2509 2510                  /*
2510 2511                   * release the contents lock so that
2511 2512                   * the cache may be updated by devfsadmd
2512 2513                   */
2513 2514                  rw_exit(&ddv->sdev_contents);
2514 2515                  mutex_enter(&ddv->sdev_lookup_lock);
2515 2516                  if (SDEV_IS_READDIR(ddv))
2516 2517                          (void) sdev_wait4lookup(ddv, SDEV_READDIR);
2517 2518                  mutex_exit(&ddv->sdev_lookup_lock);
2518 2519                  rw_enter(&ddv->sdev_contents, RW_READER);
2519 2520  
2520 2521                  sdcmn_err4(("readdir of directory %s by %s\n",
2521 2522                      ddv->sdev_name, curproc->p_user.u_comm));
2522 2523                  if (ddv->sdev_flags & SDEV_BUILD) {
2523 2524                          if (SDEV_IS_PERSIST(ddv)) {
2524 2525                                  error = sdev_filldir_from_store(ddv,
2525 2526                                      alloc_count, cred);
2526 2527                          }
2527 2528                          ddv->sdev_flags &= ~SDEV_BUILD;
2528 2529                  }
2529 2530          }
2530 2531  
2531 2532  get_cache:
2532 2533          /* handle "." and ".." */
2533 2534          diroff = 0;
2534 2535          if (soff == 0) {
2535 2536                  /* first time */
2536 2537                  this_reclen = DIRENT64_RECLEN(1);
2537 2538                  if (alloc_count < this_reclen) {
2538 2539                          error = EINVAL;
2539 2540                          goto done;
2540 2541                  }
2541 2542  
2542 2543                  dp->d_ino = (ino64_t)ddv->sdev_ino;
2543 2544                  dp->d_off = (off64_t)1;
2544 2545                  dp->d_reclen = (ushort_t)this_reclen;
2545 2546  
2546 2547                  (void) strncpy(dp->d_name, ".",
2547 2548                      DIRENT64_NAMELEN(this_reclen));
2548 2549                  outcount += dp->d_reclen;
2549 2550                  dp = nextdp(dp);
2550 2551          }
2551 2552  
2552 2553          diroff++;
2553 2554          if (soff <= 1) {
2554 2555                  this_reclen = DIRENT64_RECLEN(2);
2555 2556                  if (alloc_count < outcount + this_reclen) {
2556 2557                          error = EINVAL;
2557 2558                          goto done;
2558 2559                  }
2559 2560  
2560 2561                  dp->d_reclen = (ushort_t)this_reclen;
2561 2562                  dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino;
2562 2563                  dp->d_off = (off64_t)2;
2563 2564  
2564 2565                  (void) strncpy(dp->d_name, "..",
2565 2566                      DIRENT64_NAMELEN(this_reclen));
2566 2567                  outcount += dp->d_reclen;
2567 2568  
2568 2569                  dp = nextdp(dp);
2569 2570          }
2570 2571  
2571 2572  
2572 2573          /* gets the cache */
2573 2574          diroff++;
2574 2575          for (dv = SDEV_FIRST_ENTRY(ddv); dv;
2575 2576              dv = SDEV_NEXT_ENTRY(ddv, dv), diroff++) {
2576 2577                  sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n",
2577 2578                      diroff, soff, dv->sdev_name));
2578 2579  
2579 2580                  /* bypassing pre-matured nodes */
2580 2581                  if (diroff < soff || (dv->sdev_state != SDEV_READY)) {
2581 2582                          sdcmn_err3(("sdev_readdir: pre-mature node  "
2582 2583                              "%s %d\n", dv->sdev_name, dv->sdev_state));
2583 2584                          continue;
2584 2585                  }
2585 2586  
2586 2587                  /*
2587 2588                   * Check validity of node
2588 2589                   * Drop invalid and nodes to be skipped.
2589 2590                   * A node the validator indicates as stale needs
2590 2591                   * to be returned as presumably the node name itself
2591 2592                   * is valid and the node data itself will be refreshed
2592 2593                   * on lookup.  An application performing a readdir then
2593 2594                   * stat on each entry should thus always see consistent
2594 2595                   * data.  In any case, it is not possible to synchronize
2595 2596                   * with dynamic kernel state, and any view we return can
2596 2597                   * never be anything more than a snapshot at a point in time.
2597 2598                   */
2598 2599                  if (vtor) {
2599 2600                          switch (vtor(dv)) {
2600 2601                          case SDEV_VTOR_VALID:
2601 2602                                  break;
2602 2603                          case SDEV_VTOR_INVALID:
2603 2604                          case SDEV_VTOR_SKIP:
2604 2605                                  continue;
2605 2606                          case SDEV_VTOR_STALE:
2606 2607                                  sdcmn_err3(("sdev_readir: %s stale\n",
2607 2608                                      dv->sdev_name));
2608 2609                                  break;
2609 2610                          default:
2610 2611                                  cmn_err(CE_PANIC,
2611 2612                                      "dev fs: validator failed: %s(%p)\n",
2612 2613                                      dv->sdev_name, (void *)dv);
2613 2614                                  break;
2614 2615                          /*NOTREACHED*/
2615 2616                          }
2616 2617                  }
2617 2618  
2618 2619                  namelen = strlen(dv->sdev_name);
2619 2620                  reclen = DIRENT64_RECLEN(namelen);
2620 2621                  if (outcount + reclen > alloc_count) {
2621 2622                          goto full;
2622 2623                  }
2623 2624                  dp->d_reclen = (ushort_t)reclen;
2624 2625                  dp->d_ino = (ino64_t)dv->sdev_ino;
2625 2626                  dp->d_off = (off64_t)diroff + 1;
2626 2627                  (void) strncpy(dp->d_name, dv->sdev_name,
2627 2628                      DIRENT64_NAMELEN(reclen));
2628 2629                  outcount += reclen;
2629 2630                  dp = nextdp(dp);
2630 2631          }
2631 2632  
2632 2633  full:
2633 2634          sdcmn_err4(("sdev_readdir: moving %lu bytes: "
2634 2635              "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff,
2635 2636              (void *)dv));
2636 2637  
2637 2638          if (outcount)
2638 2639                  error = uiomove(outbuf, outcount, UIO_READ, uiop);
2639 2640  
2640 2641          if (!error) {
2641 2642                  uiop->uio_loffset = diroff;
2642 2643                  if (eofp)
2643 2644                          *eofp = dv ? 0 : 1;
2644 2645          }
2645 2646  
2646 2647  
2647 2648          if (ddv->sdev_attrvp) {
2648 2649                  gethrestime(&now);
2649 2650                  attr.va_ctime = now;
2650 2651                  attr.va_atime = now;
2651 2652                  attr.va_mask = AT_CTIME|AT_ATIME;
2652 2653  
2653 2654                  (void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL);
2654 2655          }
2655 2656  done:
2656 2657          kmem_free(outbuf, alloc_count);
2657 2658          return (error);
2658 2659  }
2659 2660  
2660 2661  static int
2661 2662  sdev_modctl_lookup(const char *path, vnode_t **r_vp)
2662 2663  {
2663 2664          vnode_t *vp;
2664 2665          vnode_t *cvp;
2665 2666          struct sdev_node *svp;
2666 2667          char *nm;
2667 2668          struct pathname pn;
2668 2669          int error;
2669 2670          int persisted = 0;
2670 2671  
2671 2672          ASSERT(INGLOBALZONE(curproc));
2672 2673  
2673 2674          if (error = pn_get((char *)path, UIO_SYSSPACE, &pn))
2674 2675                  return (error);
2675 2676          nm = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2676 2677  
2677 2678          vp = rootdir;
2678 2679          VN_HOLD(vp);
2679 2680  
2680 2681          while (pn_pathleft(&pn)) {
2681 2682                  ASSERT(vp->v_type == VDIR || vp->v_type == VLNK);
2682 2683                  (void) pn_getcomponent(&pn, nm);
2683 2684  
2684 2685                  /*
2685 2686                   * Deal with the .. special case where we may be
2686 2687                   * traversing up across a mount point, to the
2687 2688                   * root of this filesystem or global root.
2688 2689                   */
2689 2690                  if (nm[0] == '.' && nm[1] == '.' && nm[2] == 0) {
2690 2691  checkforroot:
2691 2692                          if (VN_CMP(vp, rootdir)) {
2692 2693                                  nm[1] = 0;
2693 2694                          } else if (vp->v_flag & VROOT) {
2694 2695                                  vfs_t *vfsp;
2695 2696                                  cvp = vp;
2696 2697                                  vfsp = cvp->v_vfsp;
2697 2698                                  vfs_rlock_wait(vfsp);
2698 2699                                  vp = cvp->v_vfsp->vfs_vnodecovered;
2699 2700                                  if (vp == NULL ||
2700 2701                                      (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) {
2701 2702                                          vfs_unlock(vfsp);
2702 2703                                          VN_RELE(cvp);
2703 2704                                          error = EIO;
2704 2705                                          break;
2705 2706                                  }
2706 2707                                  VN_HOLD(vp);
2707 2708                                  vfs_unlock(vfsp);
2708 2709                                  VN_RELE(cvp);
2709 2710                                  cvp = NULL;
2710 2711                                  goto checkforroot;
2711 2712                          }
2712 2713                  }
2713 2714  
2714 2715                  error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred, NULL,
2715 2716                      NULL, NULL);
2716 2717                  if (error) {
2717 2718                          VN_RELE(vp);
2718 2719                          break;
2719 2720                  }
2720 2721  
2721 2722                  /* traverse mount points encountered on our journey */
2722 2723                  if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) {
2723 2724                          VN_RELE(vp);
2724 2725                          VN_RELE(cvp);
2725 2726                          break;
2726 2727                  }
2727 2728  
2728 2729                  /*
2729 2730                   * symbolic link, can be either relative and absolute
2730 2731                   */
2731 2732                  if ((cvp->v_type == VLNK) && pn_pathleft(&pn)) {
2732 2733                          struct pathname linkpath;
2733 2734                          pn_alloc(&linkpath);
2734 2735                          if (error = pn_getsymlink(cvp, &linkpath, kcred)) {
2735 2736                                  pn_free(&linkpath);
2736 2737                                  break;
2737 2738                          }
2738 2739                          if (pn_pathleft(&linkpath) == 0)
2739 2740                                  (void) pn_set(&linkpath, ".");
2740 2741                          error = pn_insert(&pn, &linkpath, strlen(nm));
2741 2742                          pn_free(&linkpath);
2742 2743                          if (pn.pn_pathlen == 0) {
2743 2744                                  VN_RELE(vp);
2744 2745                                  return (ENOENT);
2745 2746                          }
2746 2747                          if (pn.pn_path[0] == '/') {
2747 2748                                  pn_skipslash(&pn);
2748 2749                                  VN_RELE(vp);
2749 2750                                  VN_RELE(cvp);
2750 2751                                  vp = rootdir;
2751 2752                                  VN_HOLD(vp);
2752 2753                          } else {
2753 2754                                  VN_RELE(cvp);
2754 2755                          }
2755 2756                          continue;
2756 2757                  }
2757 2758  
2758 2759                  VN_RELE(vp);
2759 2760  
2760 2761                  /*
2761 2762                   * Direct the operation to the persisting filesystem
2762 2763                   * underlying /dev.  Bail if we encounter a
2763 2764                   * non-persistent dev entity here.
2764 2765                   */
2765 2766                  if (cvp->v_vfsp->vfs_fstype == devtype) {
2766 2767  
2767 2768                          if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) {
2768 2769                                  error = ENOENT;
2769 2770                                  VN_RELE(cvp);
2770 2771                                  break;
2771 2772                          }
2772 2773  
2773 2774                          if (VTOSDEV(cvp) == NULL) {
2774 2775                                  error = ENOENT;
2775 2776                                  VN_RELE(cvp);
2776 2777                                  break;
2777 2778                          }
2778 2779                          svp = VTOSDEV(cvp);
2779 2780                          if ((vp = svp->sdev_attrvp) == NULL) {
2780 2781                                  error = ENOENT;
2781 2782                                  VN_RELE(cvp);
2782 2783                                  break;
2783 2784                          }
2784 2785                          persisted = 1;
2785 2786                          VN_HOLD(vp);
2786 2787                          VN_RELE(cvp);
2787 2788                          cvp = vp;
2788 2789                  }
2789 2790  
2790 2791                  vp = cvp;
2791 2792                  pn_skipslash(&pn);
2792 2793          }
2793 2794  
2794 2795          kmem_free(nm, MAXNAMELEN);
2795 2796          pn_free(&pn);
2796 2797  
2797 2798          if (error)
2798 2799                  return (error);
2799 2800  
2800 2801          /*
2801 2802           * Only return persisted nodes in the filesystem underlying /dev.
2802 2803           */
2803 2804          if (!persisted) {
2804 2805                  VN_RELE(vp);
2805 2806                  return (ENOENT);
2806 2807          }
2807 2808  
2808 2809          *r_vp = vp;
2809 2810          return (0);
2810 2811  }
2811 2812  
2812 2813  int
2813 2814  sdev_modctl_readdir(const char *dir, char ***dirlistp,
2814 2815          int *npathsp, int *npathsp_alloc, int checking_empty)
2815 2816  {
2816 2817          char    **pathlist = NULL;
2817 2818          char    **newlist = NULL;
2818 2819          int     npaths = 0;
2819 2820          int     npaths_alloc = 0;
2820 2821          dirent64_t *dbuf = NULL;
2821 2822          int     n;
2822 2823          char    *s;
2823 2824          int error;
2824 2825          vnode_t *vp;
2825 2826          int eof;
2826 2827          struct iovec iov;
2827 2828          struct uio uio;
2828 2829          struct dirent64 *dp;
2829 2830          size_t dlen;
2830 2831          size_t dbuflen;
2831 2832          int ndirents = 64;
2832 2833          char *nm;
2833 2834  
2834 2835          error = sdev_modctl_lookup(dir, &vp);
2835 2836          sdcmn_err11(("modctl readdir: %s by %s: %s\n",
2836 2837              dir, curproc->p_user.u_comm,
2837 2838              (error == 0) ? "ok" : "failed"));
2838 2839          if (error)
2839 2840                  return (error);
2840 2841  
2841 2842          dlen = ndirents * (sizeof (*dbuf));
2842 2843          dbuf = kmem_alloc(dlen, KM_SLEEP);
2843 2844  
2844 2845          uio.uio_iov = &iov;
2845 2846          uio.uio_iovcnt = 1;
2846 2847          uio.uio_segflg = UIO_SYSSPACE;
2847 2848          uio.uio_fmode = 0;
2848 2849          uio.uio_extflg = UIO_COPY_CACHED;
2849 2850          uio.uio_loffset = 0;
2850 2851          uio.uio_llimit = MAXOFFSET_T;
2851 2852  
2852 2853          eof = 0;
2853 2854          error = 0;
2854 2855          while (!error && !eof) {
2855 2856                  uio.uio_resid = dlen;
2856 2857                  iov.iov_base = (char *)dbuf;
2857 2858                  iov.iov_len = dlen;
2858 2859  
2859 2860                  (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2860 2861                  error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0);
2861 2862                  VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2862 2863  
2863 2864                  dbuflen = dlen - uio.uio_resid;
2864 2865  
2865 2866                  if (error || dbuflen == 0)
2866 2867                          break;
2867 2868  
2868 2869                  for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
2869 2870                      dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
2870 2871  
2871 2872                          nm = dp->d_name;
2872 2873  
2873 2874                          if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
2874 2875                                  continue;
2875 2876                          if (npaths == npaths_alloc) {
2876 2877                                  npaths_alloc += 64;
2877 2878                                  newlist = (char **)
2878 2879                                      kmem_zalloc((npaths_alloc + 1) *
2879 2880                                      sizeof (char *), KM_SLEEP);
2880 2881                                  if (pathlist) {
2881 2882                                          bcopy(pathlist, newlist,
2882 2883                                              npaths * sizeof (char *));
2883 2884                                          kmem_free(pathlist,
2884 2885                                              (npaths + 1) * sizeof (char *));
2885 2886                                  }
2886 2887                                  pathlist = newlist;
2887 2888                          }
2888 2889                          n = strlen(nm) + 1;
2889 2890                          s = kmem_alloc(n, KM_SLEEP);
2890 2891                          bcopy(nm, s, n);
2891 2892                          pathlist[npaths++] = s;
2892 2893                          sdcmn_err11(("  %s/%s\n", dir, s));
2893 2894  
2894 2895                          /* if checking empty, one entry is as good as many */
2895 2896                          if (checking_empty) {
2896 2897                                  eof = 1;
2897 2898                                  break;
2898 2899                          }
2899 2900                  }
2900 2901          }
2901 2902  
2902 2903  exit:
2903 2904          VN_RELE(vp);
2904 2905  
2905 2906          if (dbuf)
2906 2907                  kmem_free(dbuf, dlen);
2907 2908  
2908 2909          if (error)
2909 2910                  return (error);
2910 2911  
2911 2912          *dirlistp = pathlist;
2912 2913          *npathsp = npaths;
2913 2914          *npathsp_alloc = npaths_alloc;
2914 2915  
2915 2916          return (0);
2916 2917  }
2917 2918  
2918 2919  void
2919 2920  sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc)
2920 2921  {
2921 2922          int     i, n;
2922 2923  
2923 2924          for (i = 0; i < npaths; i++) {
2924 2925                  n = strlen(pathlist[i]) + 1;
2925 2926                  kmem_free(pathlist[i], n);
2926 2927          }
2927 2928  
2928 2929          kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *));
2929 2930  }
2930 2931  
2931 2932  int
2932 2933  sdev_modctl_devexists(const char *path)
2933 2934  {
2934 2935          vnode_t *vp;
2935 2936          int error;
2936 2937  
2937 2938          error = sdev_modctl_lookup(path, &vp);
2938 2939          sdcmn_err11(("modctl dev exists: %s by %s: %s\n",
2939 2940              path, curproc->p_user.u_comm,
2940 2941              (error == 0) ? "ok" : "failed"));
2941 2942          if (error == 0)
2942 2943                  VN_RELE(vp);
2943 2944  
2944 2945          return (error);
2945 2946  }
2946 2947  
2947 2948  extern int sdev_vnodeops_tbl_size;
2948 2949  
2949 2950  /*
2950 2951   * construct a new template with overrides from vtab
2951 2952   */
2952 2953  static fs_operation_def_t *
2953 2954  sdev_merge_vtab(const fs_operation_def_t tab[])
2954 2955  {
2955 2956          fs_operation_def_t *new;
2956 2957          const fs_operation_def_t *tab_entry;
2957 2958  
2958 2959          /* make a copy of standard vnode ops table */
2959 2960          new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP);
2960 2961          bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size);
2961 2962  
2962 2963          /* replace the overrides from tab */
2963 2964          for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) {
2964 2965                  fs_operation_def_t *std_entry = new;
2965 2966                  while (std_entry->name) {
2966 2967                          if (strcmp(tab_entry->name, std_entry->name) == 0) {
2967 2968                                  std_entry->func = tab_entry->func;
2968 2969                                  break;
2969 2970                          }
2970 2971                          std_entry++;
2971 2972                  }
2972 2973                  if (std_entry->name == NULL)
2973 2974                          cmn_err(CE_NOTE, "sdev_merge_vtab: entry %s unused.",
2974 2975                              tab_entry->name);
2975 2976          }
2976 2977  
2977 2978          return (new);
2978 2979  }
2979 2980  
2980 2981  /* free memory allocated by sdev_merge_vtab */
2981 2982  static void
2982 2983  sdev_free_vtab(fs_operation_def_t *new)
2983 2984  {
2984 2985          kmem_free(new, sdev_vnodeops_tbl_size);
2985 2986  }
2986 2987  
2987 2988  /*
2988 2989   * a generic setattr() function
2989 2990   *
2990 2991   * note: flags only supports AT_UID and AT_GID.
2991 2992   *       Future enhancements can be done for other types, e.g. AT_MODE
2992 2993   */
2993 2994  int
2994 2995  devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags,
2995 2996      struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *,
2996 2997      int), int protocol)
2997 2998  {
2998 2999          struct sdev_node        *dv = VTOSDEV(vp);
2999 3000          struct sdev_node        *parent = dv->sdev_dotdot;
3000 3001          struct vattr            *get;
3001 3002          uint_t                  mask = vap->va_mask;
3002 3003          int                     error;
3003 3004  
3004 3005          /* some sanity checks */
3005 3006          if (vap->va_mask & AT_NOSET)
3006 3007                  return (EINVAL);
3007 3008  
3008 3009          if (vap->va_mask & AT_SIZE) {
3009 3010                  if (vp->v_type == VDIR) {
3010 3011                          return (EISDIR);
3011 3012                  }
3012 3013          }
3013 3014  
3014 3015          /* no need to set attribute, but do not fail either */
3015 3016          ASSERT(parent);
3016 3017          rw_enter(&parent->sdev_contents, RW_READER);
3017 3018          if (dv->sdev_state == SDEV_ZOMBIE) {
3018 3019                  rw_exit(&parent->sdev_contents);
3019 3020                  return (0);
3020 3021          }
3021 3022  
3022 3023          /* If backing store exists, just set it. */
3023 3024          if (dv->sdev_attrvp) {
3024 3025                  rw_exit(&parent->sdev_contents);
3025 3026                  return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3026 3027          }
3027 3028  
3028 3029          /*
3029 3030           * Otherwise, for nodes with the persistence attribute, create it.
3030 3031           */
3031 3032          ASSERT(dv->sdev_attr);
3032 3033          if (SDEV_IS_PERSIST(dv) ||
3033 3034              ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) {
3034 3035                  sdev_vattr_merge(dv, vap);
3035 3036                  rw_enter(&dv->sdev_contents, RW_WRITER);
3036 3037                  error = sdev_shadow_node(dv, cred);
3037 3038                  rw_exit(&dv->sdev_contents);
3038 3039                  rw_exit(&parent->sdev_contents);
3039 3040  
3040 3041                  if (error)
3041 3042                          return (error);
3042 3043                  return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3043 3044          }
3044 3045  
3045 3046  
3046 3047          /*
3047 3048           * sdev_attr was allocated in sdev_mknode
3048 3049           */
3049 3050          rw_enter(&dv->sdev_contents, RW_WRITER);
3050 3051          error = secpolicy_vnode_setattr(cred, vp, vap,
3051 3052              dv->sdev_attr, flags, sdev_unlocked_access, dv);
3052 3053          if (error) {
3053 3054                  rw_exit(&dv->sdev_contents);
3054 3055                  rw_exit(&parent->sdev_contents);
3055 3056                  return (error);
3056 3057          }
3057 3058  
3058 3059          get = dv->sdev_attr;
3059 3060          if (mask & AT_MODE) {
3060 3061                  get->va_mode &= S_IFMT;
3061 3062                  get->va_mode |= vap->va_mode & ~S_IFMT;
3062 3063          }
3063 3064  
3064 3065          if ((mask & AT_UID) || (mask & AT_GID)) {
3065 3066                  if (mask & AT_UID)
3066 3067                          get->va_uid = vap->va_uid;
3067 3068                  if (mask & AT_GID)
3068 3069                          get->va_gid = vap->va_gid;
3069 3070                  /*
3070 3071                   * a callback must be provided if the protocol is set
3071 3072                   */
3072 3073                  if ((protocol & AT_UID) || (protocol & AT_GID)) {
3073 3074                          ASSERT(callback);
3074 3075                          error = callback(dv, get, protocol);
3075 3076                          if (error) {
3076 3077                                  rw_exit(&dv->sdev_contents);
3077 3078                                  rw_exit(&parent->sdev_contents);
3078 3079                                  return (error);
3079 3080                          }
3080 3081                  }
3081 3082          }
3082 3083  
3083 3084          if (mask & AT_ATIME)
3084 3085                  get->va_atime = vap->va_atime;
3085 3086          if (mask & AT_MTIME)
3086 3087                  get->va_mtime = vap->va_mtime;
3087 3088          if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) {
3088 3089                  gethrestime(&get->va_ctime);
3089 3090          }
3090 3091  
3091 3092          sdev_vattr_merge(dv, get);
3092 3093          rw_exit(&dv->sdev_contents);
3093 3094          rw_exit(&parent->sdev_contents);
3094 3095          return (0);
3095 3096  }
3096 3097  
3097 3098  /*
3098 3099   * a generic inactive() function
3099 3100   */
3100 3101  /*ARGSUSED*/
3101 3102  void
3102 3103  devname_inactive_func(struct vnode *vp, struct cred *cred,
3103 3104      void (*callback)(struct vnode *))
3104 3105  {
3105 3106          int clean;
3106 3107          struct sdev_node *dv = VTOSDEV(vp);
3107 3108          int state;
3108 3109  
3109 3110          mutex_enter(&vp->v_lock);
3110 3111          ASSERT(vp->v_count >= 1);
3111 3112  
3112 3113  
3113 3114          if (vp->v_count == 1 && callback != NULL)
3114 3115                  callback(vp);
3115 3116  
3116 3117          rw_enter(&dv->sdev_contents, RW_WRITER);
3117 3118          state = dv->sdev_state;
3118 3119  
3119 3120          clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE);
3120 3121  
3121 3122          /*
3122 3123           * sdev is a rather bad public citizen. It violates the general
3123 3124           * agreement that in memory nodes should always have a valid reference
3124 3125           * count on their vnode. But that's not the case here. This means that
3125 3126           * we do actually have to distinguish between getting inactive callbacks
3126 3127           * for zombies and otherwise. This should probably be fixed.
3127 3128           */
3128 3129          if (clean) {
3129 3130                  /* Remove the . entry to ourselves */
3130 3131                  if (vp->v_type == VDIR) {
3131 3132                          decr_link(dv);
3132 3133                  }
3133 3134                  VERIFY(dv->sdev_nlink == 1);
3134 3135                  decr_link(dv);
3135 3136                  --vp->v_count;
3136 3137                  rw_exit(&dv->sdev_contents);
3137 3138                  mutex_exit(&vp->v_lock);
3138 3139                  sdev_nodedestroy(dv, 0);
3139 3140          } else {
3140 3141                  --vp->v_count;
3141 3142                  rw_exit(&dv->sdev_contents);
3142 3143                  mutex_exit(&vp->v_lock);
3143 3144          }
3144 3145  }
  
    | 
      ↓ open down ↓ | 
    2170 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX