Print this page
    
OS-3969 lx brand: reading process map induces many DNLC scans
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/lookup.c
          +++ new/usr/src/uts/common/fs/lookup.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  
    | 
      ↓ open down ↓ | 
    13 lines elided | 
    
      ↑ open up ↑ | 
  
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
       24 + * Copyright (c) 2015, Joyent, Inc. All rights reserved.
  24   25   * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
  25   26   */
  26   27  
  27   28  /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T     */
  28   29  /*        All Rights Reserved   */
  29   30  
  30   31  /*
  31   32   * University Copyright- Copyright (c) 1982, 1986, 1988
  32   33   * The Regents of the University of California
  33   34   * All Rights Reserved
  34   35   *
  35   36   * University Acknowledgment- Portions of this document are derived from
  36   37   * software developed by the University of California, Berkeley, and its
  37   38   * contributors.
  38   39   */
  39   40  
  40   41  #include <sys/types.h>
  41   42  #include <sys/param.h>
  42   43  #include <sys/systm.h>
  43   44  #include <sys/cpuvar.h>
  44   45  #include <sys/errno.h>
  45   46  #include <sys/cred.h>
  46   47  #include <sys/user.h>
  47   48  #include <sys/uio.h>
  48   49  #include <sys/vfs.h>
  49   50  #include <sys/vnode.h>
  
    | 
      ↓ open down ↓ | 
    16 lines elided | 
    
      ↑ open up ↑ | 
  
  50   51  #include <sys/pathname.h>
  51   52  #include <sys/proc.h>
  52   53  #include <sys/vtrace.h>
  53   54  #include <sys/sysmacros.h>
  54   55  #include <sys/debug.h>
  55   56  #include <sys/dirent.h>
  56   57  #include <c2/audit.h>
  57   58  #include <sys/zone.h>
  58   59  #include <sys/dnlc.h>
  59   60  #include <sys/fs/snode.h>
       61 +#include <sys/brand.h>
  60   62  
  61   63  /* Controls whether paths are stored with vnodes. */
  62   64  int vfs_vnode_path = 1;
  63   65  
  64   66  int
  65   67  lookupname(
  66   68          char *fnamep,
  67   69          enum uio_seg seg,
  68   70          int followlink,
  69   71          vnode_t **dirvpp,
  70   72          vnode_t **compvpp)
  71   73  {
  72   74          return (lookupnameatcred(fnamep, seg, followlink, dirvpp, compvpp, NULL,
  73   75              CRED()));
  74   76  }
  75   77  
  76   78  /*
  77   79   * Lookup the user file name,
  78   80   * Handle allocation and freeing of pathname buffer, return error.
  79   81   */
  80   82  int
  81   83  lookupnameatcred(
  82   84          char *fnamep,                   /* user pathname */
  83   85          enum uio_seg seg,               /* addr space that name is in */
  84   86          int followlink,                 /* follow sym links */
  85   87          vnode_t **dirvpp,               /* ret for ptr to parent dir vnode */
  86   88          vnode_t **compvpp,              /* ret for ptr to component vnode */
  87   89          vnode_t *startvp,               /* start path search from vp */
  88   90          cred_t *cr)                     /* credential */
  89   91  {
  90   92          char namebuf[TYPICALMAXPATHLEN];
  91   93          struct pathname lookpn;
  92   94          int error;
  93   95  
  94   96          error = pn_get_buf(fnamep, seg, &lookpn, namebuf, sizeof (namebuf));
  95   97          if (error == 0) {
  96   98                  error = lookuppnatcred(&lookpn, NULL, followlink,
  97   99                      dirvpp, compvpp, startvp, cr);
  98  100          }
  99  101          if (error == ENAMETOOLONG) {
 100  102                  /*
 101  103                   * This thread used a pathname > TYPICALMAXPATHLEN bytes long.
 102  104                   */
 103  105                  if (error = pn_get(fnamep, seg, &lookpn))
 104  106                          return (error);
 105  107                  error = lookuppnatcred(&lookpn, NULL, followlink,
 106  108                      dirvpp, compvpp, startvp, cr);
 107  109                  pn_free(&lookpn);
 108  110          }
 109  111  
 110  112          return (error);
 111  113  }
 112  114  
 113  115  int
 114  116  lookupnameat(char *fnamep, enum uio_seg seg, int followlink,
 115  117      vnode_t **dirvpp, vnode_t **compvpp, vnode_t *startvp)
 116  118  {
 117  119          return (lookupnameatcred(fnamep, seg, followlink, dirvpp, compvpp,
 118  120              startvp, CRED()));
 119  121  }
 120  122  
 121  123  int
 122  124  lookuppn(
 123  125          struct pathname *pnp,
 124  126          struct pathname *rpnp,
 125  127          int followlink,
 126  128          vnode_t **dirvpp,
 127  129          vnode_t **compvpp)
 128  130  {
 129  131          return (lookuppnatcred(pnp, rpnp, followlink, dirvpp, compvpp, NULL,
 130  132              CRED()));
 131  133  }
 132  134  
 133  135  /*
 134  136   * Lookup the user file name from a given vp, using a specific credential.
 135  137   */
 136  138  int
 137  139  lookuppnatcred(
 138  140          struct pathname *pnp,           /* pathname to lookup */
 139  141          struct pathname *rpnp,          /* if non-NULL, return resolved path */
 140  142          int followlink,                 /* (don't) follow sym links */
 141  143          vnode_t **dirvpp,               /* ptr for parent vnode */
 142  144          vnode_t **compvpp,              /* ptr for entry vnode */
 143  145          vnode_t *startvp,               /* start search from this vp */
 144  146          cred_t *cr)                     /* user credential */
 145  147  {
 146  148          vnode_t *vp;    /* current directory vp */
 147  149          vnode_t *rootvp;
 148  150          proc_t *p = curproc;
 149  151  
 150  152          if (pnp->pn_pathlen == 0)
 151  153                  return (ENOENT);
 152  154  
 153  155          mutex_enter(&p->p_lock);        /* for u_rdir and u_cdir */
 154  156          if ((rootvp = PTOU(p)->u_rdir) == NULL)
 155  157                  rootvp = rootdir;
 156  158          else if (rootvp != rootdir)     /* no need to VN_HOLD rootdir */
 157  159                  VN_HOLD(rootvp);
 158  160  
 159  161          if (pnp->pn_path[0] == '/') {
 160  162                  vp = rootvp;
 161  163          } else {
 162  164                  vp = (startvp == NULL) ? PTOU(p)->u_cdir : startvp;
 163  165          }
 164  166          VN_HOLD(vp);
 165  167          mutex_exit(&p->p_lock);
 166  168  
 167  169          /*
 168  170           * Skip over leading slashes
 169  171           */
 170  172          if (pnp->pn_path[0] == '/') {
 171  173                  do {
 172  174                          pnp->pn_path++;
 173  175                          pnp->pn_pathlen--;
 174  176                  } while (pnp->pn_path[0] == '/');
 175  177          }
 176  178  
 177  179          return (lookuppnvp(pnp, rpnp, followlink, dirvpp,
 178  180              compvpp, rootvp, vp, cr));
 179  181  }
 180  182  
 181  183  int
 182  184  lookuppnat(struct pathname *pnp, struct pathname *rpnp,
 183  185      int followlink, vnode_t **dirvpp, vnode_t **compvpp,
 184  186      vnode_t *startvp)
 185  187  {
 186  188          return (lookuppnatcred(pnp, rpnp, followlink, dirvpp, compvpp, startvp,
 187  189              CRED()));
 188  190  }
 189  191  
 190  192  /* Private flag to do our getcwd() dirty work */
 191  193  #define LOOKUP_CHECKREAD        0x10
 192  194  #define LOOKUP_MASK             (~LOOKUP_CHECKREAD)
 193  195  
 194  196  /*
 195  197   * Starting at current directory, translate pathname pnp to end.
 196  198   * Leave pathname of final component in pnp, return the vnode
 197  199   * for the final component in *compvpp, and return the vnode
 198  200   * for the parent of the final component in dirvpp.
 199  201   *
 200  202   * This is the central routine in pathname translation and handles
 201  203   * multiple components in pathnames, separating them at /'s.  It also
 202  204   * implements mounted file systems and processes symbolic links.
 203  205   *
 204  206   * vp is the vnode where the directory search should start.
 205  207   *
 206  208   * Reference counts: vp must be held prior to calling this function.  rootvp
 207  209   * should only be held if rootvp != rootdir.
 208  210   */
 209  211  int
 210  212  lookuppnvp(
 211  213          struct pathname *pnp,           /* pathname to lookup */
 212  214          struct pathname *rpnp,          /* if non-NULL, return resolved path */
 213  215          int flags,                      /* follow symlinks */
 214  216          vnode_t **dirvpp,               /* ptr for parent vnode */
 215  217          vnode_t **compvpp,              /* ptr for entry vnode */
 216  218          vnode_t *rootvp,                /* rootvp */
 217  219          vnode_t *vp,                    /* directory to start search at */
 218  220          cred_t *cr)                     /* user's credential */
 219  221  {
 220  222          vnode_t *cvp;   /* current component vp */
 221  223          char component[MAXNAMELEN];     /* buffer for component (incl null) */
 222  224          int error;
 223  225          int nlink;
 224  226          int lookup_flags;
 225  227          struct pathname presrvd; /* case preserved name */
 226  228          struct pathname *pp = NULL;
 227  229          vnode_t *startvp;
 228  230          vnode_t *zonevp = curproc->p_zone->zone_rootvp;         /* zone root */
 229  231          int must_be_directory = 0;
 230  232          boolean_t retry_with_kcred;
 231  233          uint32_t auditing = AU_AUDITING();
 232  234  
 233  235          CPU_STATS_ADDQ(CPU, sys, namei, 1);
 234  236          nlink = 0;
 235  237          cvp = NULL;
 236  238          if (rpnp)
 237  239                  rpnp->pn_pathlen = 0;
 238  240  
 239  241          lookup_flags = dirvpp ? LOOKUP_DIR : 0;
 240  242          if (flags & FIGNORECASE) {
 241  243                  lookup_flags |= FIGNORECASE;
 242  244                  pn_alloc(&presrvd);
 243  245                  pp = &presrvd;
 244  246          }
 245  247  
 246  248          if (auditing)
 247  249                  audit_anchorpath(pnp, vp == rootvp);
 248  250  
 249  251          /*
 250  252           * Eliminate any trailing slashes in the pathname.
 251  253           * If there are any, we must follow all symlinks.
 252  254           * Also, we must guarantee that the last component is a directory.
 253  255           */
 254  256          if (pn_fixslash(pnp)) {
 255  257                  flags |= FOLLOW;
 256  258                  must_be_directory = 1;
 257  259          }
 258  260  
 259  261          startvp = vp;
 260  262  next:
 261  263          retry_with_kcred = B_FALSE;
 262  264  
 263  265          /*
 264  266           * Make sure we have a directory.
 265  267           */
 266  268          if (vp->v_type != VDIR) {
 267  269                  error = ENOTDIR;
 268  270                  goto bad;
 269  271          }
 270  272  
 271  273          if (rpnp && VN_CMP(vp, rootvp))
 272  274                  (void) pn_set(rpnp, "/");
 273  275  
 274  276          /*
 275  277           * Process the next component of the pathname.
 276  278           */
 277  279          if (error = pn_getcomponent(pnp, component)) {
 278  280                  goto bad;
 279  281          }
 280  282  
 281  283          /*
 282  284           * Handle "..": two special cases.
 283  285           * 1. If we're at the root directory (e.g. after chroot or
 284  286           *    zone_enter) then change ".." to "." so we can't get
 285  287           *    out of this subtree.
 286  288           * 2. If this vnode is the root of a mounted file system,
 287  289           *    then replace it with the vnode that was mounted on
 288  290           *    so that we take the ".." in the other file system.
 289  291           */
 290  292          if (component[0] == '.' && component[1] == '.' && component[2] == 0) {
 291  293  checkforroot:
 292  294                  if (VN_CMP(vp, rootvp) || VN_CMP(vp, zonevp)) {
 293  295                          component[1] = '\0';
 294  296                  } else if (vp->v_flag & VROOT) {
 295  297                          vfs_t *vfsp;
 296  298                          cvp = vp;
 297  299  
 298  300                          /*
 299  301                           * While we deal with the vfs pointer from the vnode
 300  302                           * the filesystem could have been forcefully unmounted
 301  303                           * and the vnode's v_vfsp could have been invalidated
 302  304                           * by VFS_UNMOUNT. Hence, we cache v_vfsp and use it
 303  305                           * with vfs_rlock_wait/vfs_unlock.
 304  306                           * It is safe to use the v_vfsp even it is freed by
 305  307                           * VFS_UNMOUNT because vfs_rlock_wait/vfs_unlock
 306  308                           * do not dereference v_vfsp. It is just used as a
 307  309                           * magic cookie.
 308  310                           * One more corner case here is the memory getting
 309  311                           * reused for another vfs structure. In this case
 310  312                           * lookuppnvp's vfs_rlock_wait will succeed, domount's
 311  313                           * vfs_lock will fail and domount will bail out with an
 312  314                           * error (EBUSY).
 313  315                           */
 314  316                          vfsp = cvp->v_vfsp;
 315  317  
 316  318                          /*
 317  319                           * This lock is used to synchronize
 318  320                           * mounts/unmounts and lookups.
 319  321                           * Threads doing mounts/unmounts hold the
 320  322                           * writers version vfs_lock_wait().
 321  323                           */
 322  324  
 323  325                          vfs_rlock_wait(vfsp);
 324  326  
 325  327                          /*
 326  328                           * If this vnode is on a file system that
 327  329                           * has been forcibly unmounted,
 328  330                           * we can't proceed. Cancel this operation
 329  331                           * and return EIO.
 330  332                           *
 331  333                           * vfs_vnodecovered is NULL if unmounted.
 332  334                           * Currently, nfs uses VFS_UNMOUNTED to
 333  335                           * check if it's a forced-umount. Keep the
 334  336                           * same checking here as well even though it
 335  337                           * may not be needed.
 336  338                           */
 337  339                          if (((vp = cvp->v_vfsp->vfs_vnodecovered) == NULL) ||
 338  340                              (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) {
 339  341                                  vfs_unlock(vfsp);
 340  342                                  VN_RELE(cvp);
 341  343                                  if (pp)
 342  344                                          pn_free(pp);
 343  345                                  return (EIO);
 344  346                          }
 345  347                          VN_HOLD(vp);
 346  348                          vfs_unlock(vfsp);
 347  349                          VN_RELE(cvp);
 348  350                          cvp = NULL;
 349  351                          /*
 350  352                           * Crossing mount points. For eg: We are doing
 351  353                           * a lookup of ".." for file systems root vnode
 352  354                           * mounted here, and VOP_LOOKUP() (with covered vnode)
 353  355                           * will be on underlying file systems mount point
 354  356                           * vnode. Set retry_with_kcred flag as we might end
 355  357                           * up doing VOP_LOOKUP() with kcred if required.
 356  358                           */
 357  359                          retry_with_kcred = B_TRUE;
 358  360                          goto checkforroot;
 359  361                  }
 360  362          }
 361  363  
 362  364          /*
 363  365           * LOOKUP_CHECKREAD is a private flag used by vnodetopath() to indicate
 364  366           * that we need to have read permission on every directory in the entire
 365  367           * path.  This is used to ensure that a forward-lookup of a cached value
 366  368           * has the same effect as a reverse-lookup when the cached value cannot
 367  369           * be found.
 368  370           */
 369  371          if ((flags & LOOKUP_CHECKREAD) &&
 370  372              (error = VOP_ACCESS(vp, VREAD, 0, cr, NULL)) != 0)
 371  373                  goto bad;
 372  374  
 373  375          /*
 374  376           * Perform a lookup in the current directory.
 375  377           */
 376  378          error = VOP_LOOKUP(vp, component, &cvp, pnp, lookup_flags,
 377  379              rootvp, cr, NULL, NULL, pp);
 378  380  
 379  381          /*
 380  382           * Retry with kcred - If crossing mount points & error is EACCES.
 381  383           *
 382  384           * If we are crossing mount points here and doing ".." lookup,
 383  385           * VOP_LOOKUP() might fail if the underlying file systems
 384  386           * mount point has no execute permission. In cases like these,
 385  387           * we retry VOP_LOOKUP() by giving as much privilage as possible
 386  388           * by passing kcred credentials.
 387  389           *
 388  390           * In case of hierarchical file systems, passing kcred still may
 389  391           * or may not work.
 390  392           * For eg: UFS FS --> Mount NFS FS --> Again mount UFS on some
 391  393           *                      directory inside NFS FS.
 392  394           */
 393  395          if ((error == EACCES) && retry_with_kcred)
 394  396                  error = VOP_LOOKUP(vp, component, &cvp, pnp, lookup_flags,
 395  397                      rootvp, zone_kcred(), NULL, NULL, pp);
 396  398  
 397  399          if (error) {
 398  400                  cvp = NULL;
 399  401                  /*
 400  402                   * On error, return hard error if
 401  403                   * (a) we're not at the end of the pathname yet, or
 402  404                   * (b) the caller didn't want the parent directory, or
 403  405                   * (c) we failed for some reason other than a missing entry.
 404  406                   */
 405  407                  if (pn_pathleft(pnp) || dirvpp == NULL || error != ENOENT)
 406  408                          goto bad;
 407  409                  if (auditing) { /* directory access */
 408  410                          if (error = audit_savepath(pnp, vp, vp, error, cr))
 409  411                                  goto bad_noaudit;
 410  412                  }
 411  413  
 412  414                  pn_setlast(pnp);
 413  415                  /*
 414  416                   * We inform the caller that the desired entry must be
 415  417                   * a directory by adding a '/' to the component name.
 416  418                   */
 417  419                  if (must_be_directory && (error = pn_addslash(pnp)) != 0)
 418  420                          goto bad;
 419  421                  *dirvpp = vp;
 420  422                  if (compvpp != NULL)
 421  423                          *compvpp = NULL;
 422  424                  if (rootvp != rootdir)
 423  425                          VN_RELE(rootvp);
 424  426                  if (pp)
 425  427                          pn_free(pp);
 426  428                  return (0);
 427  429          }
 428  430  
 429  431          /*
 430  432           * Traverse mount points.
 431  433           * XXX why don't we need to hold a read lock here (call vn_vfsrlock)?
 432  434           * What prevents a concurrent update to v_vfsmountedhere?
 433  435           *      Possible answer: if mounting, we might not see the mount
 434  436           *      if it is concurrently coming into existence, but that's
 435  437           *      really not much different from the thread running a bit slower.
 436  438           *      If unmounting, we may get into traverse() when we shouldn't,
 437  439           *      but traverse() will catch this case for us.
 438  440           *      (For this to work, fetching v_vfsmountedhere had better
 439  441           *      be atomic!)
 440  442           */
 441  443          if (vn_mountedvfs(cvp) != NULL) {
 442  444                  if ((error = traverse(&cvp)) != 0)
 443  445                          goto bad;
 444  446          }
 445  447  
 446  448          /*
 447  449           * If we hit a symbolic link and there is more path to be
 448  450           * translated or this operation does not wish to apply
 449  451           * to a link, then place the contents of the link at the
 450  452           * front of the remaining pathname.
 451  453           */
 452  454          if (cvp->v_type == VLNK && ((flags & FOLLOW) || pn_pathleft(pnp))) {
 453  455                  struct pathname linkpath;
 454  456  
 455  457                  if (++nlink > MAXSYMLINKS) {
 456  458                          error = ELOOP;
 457  459                          goto bad;
 458  460                  }
 459  461                  pn_alloc(&linkpath);
 460  462                  if (error = pn_getsymlink(cvp, &linkpath, cr)) {
 461  463                          pn_free(&linkpath);
 462  464                          goto bad;
 463  465                  }
 464  466  
 465  467                  if (auditing)
 466  468                          audit_symlink(pnp, &linkpath);
 467  469  
 468  470                  if (pn_pathleft(&linkpath) == 0)
 469  471                          (void) pn_set(&linkpath, ".");
 470  472                  error = pn_insert(pnp, &linkpath, strlen(component));
 471  473                  pn_free(&linkpath);
 472  474                  if (error)
 473  475                          goto bad;
 474  476                  VN_RELE(cvp);
 475  477                  cvp = NULL;
 476  478                  if (pnp->pn_pathlen == 0) {
 477  479                          error = ENOENT;
 478  480                          goto bad;
 479  481                  }
 480  482                  if (pnp->pn_path[0] == '/') {
 481  483                          do {
 482  484                                  pnp->pn_path++;
 483  485                                  pnp->pn_pathlen--;
 484  486                          } while (pnp->pn_path[0] == '/');
 485  487                          VN_RELE(vp);
 486  488                          vp = rootvp;
 487  489                          VN_HOLD(vp);
 488  490                  }
 489  491                  if (auditing)
 490  492                          audit_anchorpath(pnp, vp == rootvp);
 491  493                  if (pn_fixslash(pnp)) {
 492  494                          flags |= FOLLOW;
 493  495                          must_be_directory = 1;
 494  496                  }
 495  497                  goto next;
 496  498          }
 497  499  
 498  500          /*
 499  501           * If rpnp is non-NULL, remember the resolved path name therein.
 500  502           * Do not include "." components.  Collapse occurrences of
 501  503           * "previous/..", so long as "previous" is not itself "..".
 502  504           * Exhausting rpnp results in error ENAMETOOLONG.
 503  505           */
 504  506          if (rpnp && strcmp(component, ".") != 0) {
 505  507                  size_t len;
 506  508  
 507  509                  if (strcmp(component, "..") == 0 &&
 508  510                      rpnp->pn_pathlen != 0 &&
 509  511                      !((rpnp->pn_pathlen > 2 &&
 510  512                      strncmp(rpnp->pn_path+rpnp->pn_pathlen-3, "/..", 3) == 0) ||
 511  513                      (rpnp->pn_pathlen == 2 &&
 512  514                      strncmp(rpnp->pn_path, "..", 2) == 0))) {
 513  515                          while (rpnp->pn_pathlen &&
 514  516                              rpnp->pn_path[rpnp->pn_pathlen-1] != '/')
 515  517                                  rpnp->pn_pathlen--;
 516  518                          if (rpnp->pn_pathlen > 1)
 517  519                                  rpnp->pn_pathlen--;
 518  520                          rpnp->pn_path[rpnp->pn_pathlen] = '\0';
 519  521                  } else {
 520  522                          if (rpnp->pn_pathlen != 0 &&
 521  523                              rpnp->pn_path[rpnp->pn_pathlen-1] != '/')
 522  524                                  rpnp->pn_path[rpnp->pn_pathlen++] = '/';
 523  525                          if (flags & FIGNORECASE) {
 524  526                                  /*
 525  527                                   * Return the case-preserved name
 526  528                                   * within the resolved path.
 527  529                                   */
 528  530                                  error = copystr(pp->pn_buf,
 529  531                                      rpnp->pn_path + rpnp->pn_pathlen,
 530  532                                      rpnp->pn_bufsize - rpnp->pn_pathlen, &len);
 531  533                          } else {
 532  534                                  error = copystr(component,
 533  535                                      rpnp->pn_path + rpnp->pn_pathlen,
 534  536                                      rpnp->pn_bufsize - rpnp->pn_pathlen, &len);
 535  537                          }
 536  538                          if (error)      /* copystr() returns ENAMETOOLONG */
 537  539                                  goto bad;
 538  540                          rpnp->pn_pathlen += (len - 1);
 539  541                          ASSERT(rpnp->pn_bufsize > rpnp->pn_pathlen);
 540  542                  }
 541  543          }
 542  544  
 543  545          /*
 544  546           * If no more components, return last directory (if wanted) and
 545  547           * last component (if wanted).
 546  548           */
 547  549          if (pn_pathleft(pnp) == 0) {
 548  550                  /*
 549  551                   * If there was a trailing slash in the pathname,
 550  552                   * make sure the last component is a directory.
 551  553                   */
 552  554                  if (must_be_directory && cvp->v_type != VDIR) {
 553  555                          error = ENOTDIR;
 554  556                          goto bad;
 555  557                  }
 556  558                  if (dirvpp != NULL) {
 557  559                          /*
 558  560                           * Check that we have the real parent and not
 559  561                           * an alias of the last component.
 560  562                           */
 561  563                          if (vn_compare(vp, cvp)) {
 562  564                                  if (auditing)
 563  565                                          (void) audit_savepath(pnp, cvp, vp,
 564  566                                              EINVAL, cr);
 565  567                                  pn_setlast(pnp);
 566  568                                  VN_RELE(vp);
 567  569                                  VN_RELE(cvp);
 568  570                                  if (rootvp != rootdir)
 569  571                                          VN_RELE(rootvp);
 570  572                                  if (pp)
 571  573                                          pn_free(pp);
 572  574                                  return (EINVAL);
 573  575                          }
 574  576                          *dirvpp = vp;
 575  577                  } else
 576  578                          VN_RELE(vp);
 577  579                  if (auditing)
 578  580                          (void) audit_savepath(pnp, cvp, vp, 0, cr);
 579  581                  if (pnp->pn_path == pnp->pn_buf)
 580  582                          (void) pn_set(pnp, ".");
 581  583                  else
 582  584                          pn_setlast(pnp);
 583  585                  if (rpnp) {
 584  586                          if (VN_CMP(cvp, rootvp))
 585  587                                  (void) pn_set(rpnp, "/");
 586  588                          else if (rpnp->pn_pathlen == 0)
 587  589                                  (void) pn_set(rpnp, ".");
 588  590                  }
 589  591  
 590  592                  if (compvpp != NULL)
 591  593                          *compvpp = cvp;
 592  594                  else
 593  595                          VN_RELE(cvp);
 594  596                  if (rootvp != rootdir)
 595  597                          VN_RELE(rootvp);
 596  598                  if (pp)
 597  599                          pn_free(pp);
 598  600                  return (0);
 599  601          }
 600  602  
 601  603          /*
 602  604           * Skip over slashes from end of last component.
 603  605           */
 604  606          while (pnp->pn_path[0] == '/') {
 605  607                  pnp->pn_path++;
 606  608                  pnp->pn_pathlen--;
 607  609          }
 608  610  
 609  611          /*
 610  612           * Searched through another level of directory:
 611  613           * release previous directory handle and save new (result
 612  614           * of lookup) as current directory.
 613  615           */
 614  616          VN_RELE(vp);
 615  617          vp = cvp;
 616  618          cvp = NULL;
 617  619          goto next;
 618  620  
 619  621  bad:
 620  622          if (auditing)   /* reached end of path */
 621  623                  (void) audit_savepath(pnp, cvp, vp, error, cr);
 622  624  bad_noaudit:
 623  625          /*
 624  626           * Error.  Release vnodes and return.
 625  627           */
 626  628          if (cvp)
 627  629                  VN_RELE(cvp);
 628  630          /*
 629  631           * If the error was ESTALE and the current directory to look in
 630  632           * was the root for this lookup, the root for a mounted file
 631  633           * system, or the starting directory for lookups, then
 632  634           * return ENOENT instead of ESTALE.  In this case, no recovery
 633  635           * is possible by the higher level.  If ESTALE was returned for
 634  636           * some intermediate directory along the path, then recovery
 635  637           * is potentially possible and retrying from the higher level
 636  638           * will either correct the situation by purging stale cache
 637  639           * entries or eventually get back to the point where no recovery
 638  640           * is possible.
 639  641           */
 640  642          if (error == ESTALE &&
 641  643              (VN_CMP(vp, rootvp) || (vp->v_flag & VROOT) || vp == startvp))
 642  644                  error = ENOENT;
 643  645          VN_RELE(vp);
 644  646          if (rootvp != rootdir)
 645  647                  VN_RELE(rootvp);
 646  648          if (pp)
 647  649                  pn_free(pp);
 648  650          return (error);
 649  651  }
 650  652  
 651  653  /*
 652  654   * Traverse a mount point.  Routine accepts a vnode pointer as a reference
 653  655   * parameter and performs the indirection, releasing the original vnode.
 654  656   */
 655  657  int
 656  658  traverse(vnode_t **cvpp)
 657  659  {
 658  660          int error = 0;
 659  661          vnode_t *cvp;
 660  662          vnode_t *tvp;
 661  663          vfs_t *vfsp;
 662  664  
 663  665          cvp = *cvpp;
 664  666  
 665  667          /*
 666  668           * If this vnode is mounted on, then we transparently indirect
 667  669           * to the vnode which is the root of the mounted file system.
 668  670           * Before we do this we must check that an unmount is not in
 669  671           * progress on this vnode.
 670  672           */
 671  673  
 672  674          for (;;) {
 673  675                  /*
 674  676                   * Try to read lock the vnode.  If this fails because
 675  677                   * the vnode is already write locked, then check to
 676  678                   * see whether it is the current thread which locked
 677  679                   * the vnode.  If it is not, then read lock the vnode
 678  680                   * by waiting to acquire the lock.
 679  681                   *
 680  682                   * The code path in domount() is an example of support
 681  683                   * which needs to look up two pathnames and locks one
 682  684                   * of them in between the two lookups.
 683  685                   */
 684  686                  error = vn_vfsrlock(cvp);
 685  687                  if (error) {
 686  688                          if (!vn_vfswlock_held(cvp))
 687  689                                  error = vn_vfsrlock_wait(cvp);
 688  690                          if (error != 0) {
 689  691                                  /*
 690  692                                   * lookuppn() expects a held vnode to be
 691  693                                   * returned because it promptly calls
 692  694                                   * VN_RELE after the error return
 693  695                                   */
 694  696                                  *cvpp = cvp;
 695  697                                  return (error);
 696  698                          }
 697  699                  }
 698  700  
 699  701                  /*
 700  702                   * Reached the end of the mount chain?
 701  703                   */
 702  704                  vfsp = vn_mountedvfs(cvp);
 703  705                  if (vfsp == NULL) {
 704  706                          vn_vfsunlock(cvp);
 705  707                          break;
 706  708                  }
 707  709  
 708  710                  /*
 709  711                   * The read lock must be held across the call to VFS_ROOT() to
 710  712                   * prevent a concurrent unmount from destroying the vfs.
 711  713                   */
 712  714                  error = VFS_ROOT(vfsp, &tvp);
 713  715                  vn_vfsunlock(cvp);
 714  716  
 715  717                  if (error)
 716  718                          break;
 717  719  
 718  720                  VN_RELE(cvp);
 719  721  
 720  722                  cvp = tvp;
 721  723          }
 722  724  
 723  725          *cvpp = cvp;
 724  726          return (error);
 725  727  }
 726  728  
 727  729  /*
 728  730   * Return the lowermost vnode if this is a mountpoint.
 729  731   */
 730  732  static vnode_t *
 731  733  vn_under(vnode_t *vp)
 732  734  {
 733  735          vnode_t *uvp;
 734  736          vfs_t *vfsp;
 735  737  
 736  738          while (vp->v_flag & VROOT) {
 737  739  
 738  740                  vfsp = vp->v_vfsp;
 739  741                  vfs_rlock_wait(vfsp);
 740  742                  if ((uvp = vfsp->vfs_vnodecovered) == NULL ||
 741  743                      (vfsp->vfs_flag & VFS_UNMOUNTED)) {
 742  744                          vfs_unlock(vfsp);
 743  745                          break;
 744  746                  }
 745  747                  VN_HOLD(uvp);
 746  748                  vfs_unlock(vfsp);
 747  749                  VN_RELE(vp);
 748  750                  vp = uvp;
 749  751          }
 750  752  
 751  753          return (vp);
 752  754  }
 753  755  
 754  756  static int
 755  757  vnode_match(vnode_t *v1, vnode_t *v2, cred_t *cr)
 756  758  {
 757  759          vattr_t v1attr, v2attr;
 758  760  
 759  761          /*
 760  762           * If we have a device file, check to see if is a cloned open of the
 761  763           * same device.  For self-cloning devices, the major numbers will match.
 762  764           * For devices cloned through the 'clone' driver, the minor number of
 763  765           * the source device will be the same as the major number of the cloned
 764  766           * device.
 765  767           */
 766  768          if ((v1->v_type == VCHR || v1->v_type == VBLK) &&
 767  769              v1->v_type == v2->v_type) {
 768  770                  if ((spec_is_selfclone(v1) || spec_is_selfclone(v2)) &&
 769  771                      getmajor(v1->v_rdev) == getmajor(v2->v_rdev))
 770  772                          return (1);
 771  773  
 772  774                  if (spec_is_clone(v1) &&
 773  775                      getmajor(v1->v_rdev) == getminor(v2->v_rdev))
 774  776                          return (1);
 775  777  
 776  778                  if (spec_is_clone(v2) &&
 777  779                      getmajor(v2->v_rdev) == getminor(v1->v_rdev))
 778  780                          return (1);
 779  781          }
 780  782  
 781  783          v1attr.va_mask = v2attr.va_mask = AT_TYPE;
 782  784  
 783  785          /*
 784  786           * This check for symbolic links handles the pseudo-symlinks in procfs.
 785  787           * These particular links have v_type of VDIR, but the attributes have a
 786  788           * type of VLNK.  We need to avoid these links because otherwise if we
 787  789           * are currently in '/proc/self/fd', then '/proc/self/cwd' will compare
 788  790           * as the same vnode.
 789  791           */
 790  792          if (VOP_GETATTR(v1, &v1attr, 0, cr, NULL) != 0 ||
 791  793              VOP_GETATTR(v2, &v2attr, 0, cr, NULL) != 0 ||
 792  794              v1attr.va_type == VLNK || v2attr.va_type == VLNK)
 793  795                  return (0);
 794  796  
 795  797          v1attr.va_mask = v2attr.va_mask = AT_TYPE | AT_FSID | AT_NODEID;
 796  798  
 797  799          if (VOP_GETATTR(v1, &v1attr, ATTR_REAL, cr, NULL) != 0 ||
 798  800              VOP_GETATTR(v2, &v2attr, ATTR_REAL, cr, NULL) != 0)
 799  801                  return (0);
 800  802  
 801  803          return (v1attr.va_fsid == v2attr.va_fsid &&
 802  804              v1attr.va_nodeid == v2attr.va_nodeid);
 803  805  }
 804  806  
 805  807  
 806  808  /*
 807  809   * Find the entry in the directory corresponding to the target vnode.
 808  810   */
 809  811  int
 810  812  dirfindvp(vnode_t *vrootp, vnode_t *dvp, vnode_t *tvp, cred_t *cr, char *dbuf,
 811  813      size_t dlen, dirent64_t **rdp)
 812  814  {
 813  815          size_t dbuflen;
 814  816          struct iovec iov;
 815  817          struct uio uio;
 816  818          int error;
 817  819          int eof;
 818  820          vnode_t *cmpvp;
 819  821          struct dirent64 *dp;
 820  822          pathname_t pnp;
 821  823  
 822  824          ASSERT(dvp->v_type == VDIR);
 823  825  
 824  826          /*
 825  827           * This is necessary because of the strange semantics of VOP_LOOKUP().
 826  828           */
 827  829          bzero(&pnp, sizeof (pnp));
 828  830  
 829  831          eof = 0;
 830  832  
 831  833          uio.uio_iov = &iov;
 832  834          uio.uio_iovcnt = 1;
 833  835          uio.uio_segflg = UIO_SYSSPACE;
 834  836          uio.uio_fmode = 0;
 835  837          uio.uio_extflg = UIO_COPY_CACHED;
 836  838          uio.uio_loffset = 0;
 837  839  
 838  840          if ((error = VOP_ACCESS(dvp, VREAD, 0, cr, NULL)) != 0)
 839  841                  return (error);
 840  842  
 841  843          while (!eof) {
 842  844                  uio.uio_resid = dlen;
 843  845                  iov.iov_base = dbuf;
 844  846                  iov.iov_len = dlen;
 845  847  
 846  848                  (void) VOP_RWLOCK(dvp, V_WRITELOCK_FALSE, NULL);
 847  849                  error = VOP_READDIR(dvp, &uio, cr, &eof, NULL, 0);
 848  850                  VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL);
 849  851  
 850  852                  dbuflen = dlen - uio.uio_resid;
 851  853  
 852  854                  if (error || dbuflen == 0)
 853  855                          break;
 854  856  
 855  857                  dp = (dirent64_t *)dbuf;
 856  858                  while ((intptr_t)dp < (intptr_t)dbuf + dbuflen) {
 857  859                          /*
 858  860                           * Ignore '.' and '..' entries
 859  861                           */
 860  862                          if (strcmp(dp->d_name, ".") == 0 ||
 861  863                              strcmp(dp->d_name, "..") == 0) {
 862  864                                  dp = (dirent64_t *)((intptr_t)dp +
 863  865                                      dp->d_reclen);
 864  866                                  continue;
 865  867                          }
 866  868  
 867  869                          error = VOP_LOOKUP(dvp, dp->d_name, &cmpvp, &pnp, 0,
 868  870                              vrootp, cr, NULL, NULL, NULL);
 869  871  
 870  872                          /*
 871  873                           * We only want to bail out if there was an error other
 872  874                           * than ENOENT.  Otherwise, it could be that someone
 873  875                           * just removed an entry since the readdir() call, and
 874  876                           * the entry we want is further on in the directory.
 875  877                           */
 876  878                          if (error == 0) {
 877  879                                  if (vnode_match(tvp, cmpvp, cr)) {
 878  880                                          VN_RELE(cmpvp);
 879  881                                          *rdp = dp;
 880  882                                          return (0);
 881  883                                  }
 882  884  
 883  885                                  VN_RELE(cmpvp);
 884  886                          } else if (error != ENOENT) {
 885  887                                  return (error);
 886  888                          }
 887  889  
 888  890                          dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen);
 889  891                  }
 890  892          }
 891  893  
 892  894          /*
 893  895           * Something strange has happened, this directory does not contain the
 894  896           * specified vnode.  This should never happen in the normal case, since
 895  897           * we ensured that dvp is the parent of vp.  This is possible in some
 896  898           * rare conditions (races and the special .zfs directory).
 897  899           */
 898  900          if (error == 0) {
 899  901                  error = VOP_LOOKUP(dvp, ".zfs", &cmpvp, &pnp, 0, vrootp, cr,
 900  902                      NULL, NULL, NULL);
 901  903                  if (error == 0) {
 902  904                          if (vnode_match(tvp, cmpvp, cr)) {
 903  905                                  (void) strcpy(dp->d_name, ".zfs");
 904  906                                  dp->d_reclen = strlen(".zfs");
 905  907                                  dp->d_off = 2;
 906  908                                  dp->d_ino = 1;
 907  909                                  *rdp = dp;
 908  910                          } else {
 909  911                                  error = ENOENT;
 910  912                          }
 911  913                          VN_RELE(cmpvp);
 912  914                  }
 913  915          }
 914  916  
 915  917          return (error);
 916  918  }
 917  919  
 918  920  /*
 919  921   * Given a global path (from rootdir), and a vnode that is the current root,
 920  922   * return the portion of the path that is beneath the current root or NULL on
 921  923   * failure.  The path MUST be a resolved path (no '..' entries or symlinks),
 922  924   * otherwise this function will fail.
 923  925   */
 924  926  static char *
 925  927  localpath(char *path, struct vnode *vrootp, cred_t *cr)
 926  928  {
 927  929          vnode_t *vp;
 928  930          vnode_t *cvp;
 929  931          char component[MAXNAMELEN];
 930  932          char *ret = NULL;
 931  933          pathname_t pn;
 932  934  
 933  935          /*
 934  936           * We use vn_compare() instead of VN_CMP() in order to detect lofs
 935  937           * mounts and stacked vnodes.
 936  938           */
 937  939          if (vn_compare(vrootp, rootdir))
 938  940                  return (path);
 939  941  
 940  942          if (pn_get(path, UIO_SYSSPACE, &pn) != 0)
 941  943                  return (NULL);
 942  944  
 943  945          vp = rootdir;
 944  946          VN_HOLD(vp);
 945  947  
 946  948          if (vn_ismntpt(vp) && traverse(&vp) != 0) {
 947  949                  VN_RELE(vp);
 948  950                  pn_free(&pn);
 949  951                  return (NULL);
 950  952          }
 951  953  
 952  954          while (pn_pathleft(&pn)) {
 953  955                  pn_skipslash(&pn);
 954  956  
 955  957                  if (pn_getcomponent(&pn, component) != 0)
 956  958                          break;
 957  959  
 958  960                  if (VOP_LOOKUP(vp, component, &cvp, &pn, 0, rootdir, cr,
 959  961                      NULL, NULL, NULL) != 0)
 960  962                          break;
 961  963                  VN_RELE(vp);
 962  964                  vp = cvp;
 963  965  
 964  966                  if (vn_ismntpt(vp) && traverse(&vp) != 0)
 965  967                          break;
 966  968  
 967  969                  if (vn_compare(vp, vrootp)) {
 968  970                          ret = path + (pn.pn_path - pn.pn_buf);
 969  971                          break;
 970  972                  }
 971  973          }
 972  974  
 973  975          VN_RELE(vp);
 974  976          pn_free(&pn);
 975  977  
 976  978          return (ret);
 977  979  }
 978  980  
 979  981  /*
 980  982   * Given a directory, return the full, resolved path.  This looks up "..",
 981  983   * searches for the given vnode in the parent, appends the component, etc.  It
 982  984   * is used to implement vnodetopath() and getcwd() when the cached path fails.
 983  985   */
 984  986  static int
 985  987  dirtopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, int flags,
 986  988      cred_t *cr)
 987  989  {
 988  990          pathname_t pn, rpn, emptypn;
 989  991          vnode_t *cmpvp, *pvp = NULL;
 990  992          vnode_t *startvp = vp;
 991  993          int err = 0, vprivs;
 992  994          size_t complen;
 993  995          char *dbuf;
 994  996          dirent64_t *dp;
 995  997          char            *bufloc;
 996  998          size_t          dlen = DIRENT64_RECLEN(MAXPATHLEN);
 997  999          refstr_t        *mntpt;
 998 1000  
 999 1001          /* Operation only allowed on directories */
1000 1002          ASSERT(vp->v_type == VDIR);
1001 1003  
1002 1004          /* We must have at least enough space for "/" */
1003 1005          if (buflen < 2)
1004 1006                  return (ENAMETOOLONG);
1005 1007  
1006 1008          /* Start at end of string with terminating null */
1007 1009          bufloc = &buf[buflen - 1];
1008 1010          *bufloc = '\0';
1009 1011  
1010 1012          pn_alloc(&pn);
1011 1013          pn_alloc(&rpn);
1012 1014          dbuf = kmem_alloc(dlen, KM_SLEEP);
1013 1015          bzero(&emptypn, sizeof (emptypn));
1014 1016  
1015 1017          /*
1016 1018           * Begin with an additional reference on vp.  This will be decremented
1017 1019           * during the loop.
1018 1020           */
1019 1021          VN_HOLD(vp);
1020 1022  
1021 1023          for (;;) {
1022 1024                  /*
1023 1025                   * Return if we've reached the root.  If the buffer is empty,
1024 1026                   * return '/'.  We explicitly don't use vn_compare(), since it
1025 1027                   * compares the real vnodes.  A lofs mount of '/' would produce
1026 1028                   * incorrect results otherwise.
1027 1029                   */
1028 1030                  if (VN_CMP(vrootp, vp)) {
1029 1031                          if (*bufloc == '\0')
1030 1032                                  *--bufloc = '/';
1031 1033                          break;
1032 1034                  }
1033 1035  
1034 1036                  /*
1035 1037                   * If we've reached the VFS root, something has gone wrong.  We
1036 1038                   * should have reached the root in the above check.  The only
1037 1039                   * explantation is that 'vp' is not contained withing the given
1038 1040                   * root, in which case we return EPERM.
1039 1041                   */
1040 1042                  if (VN_CMP(rootdir, vp)) {
1041 1043                          err = EPERM;
1042 1044                          goto out;
1043 1045                  }
1044 1046  
1045 1047                  /*
1046 1048                   * Shortcut: see if this vnode is a mountpoint.  If so,
1047 1049                   * grab the path information from the vfs_t.
1048 1050                   */
1049 1051                  if (vp->v_flag & VROOT) {
1050 1052  
1051 1053                          mntpt = vfs_getmntpoint(vp->v_vfsp);
1052 1054                          if ((err = pn_set(&pn, (char *)refstr_value(mntpt)))
1053 1055                              == 0) {
1054 1056                                  refstr_rele(mntpt);
1055 1057                                  rpn.pn_path = rpn.pn_buf;
1056 1058  
1057 1059                                  /*
1058 1060                                   * Ensure the mountpoint still exists.
1059 1061                                   */
1060 1062                                  VN_HOLD(vrootp);
1061 1063                                  if (vrootp != rootdir)
1062 1064                                          VN_HOLD(vrootp);
1063 1065                                  if (lookuppnvp(&pn, &rpn, flags, NULL,
1064 1066                                      &cmpvp, vrootp, vrootp, cr) == 0) {
1065 1067  
1066 1068                                          if (VN_CMP(vp, cmpvp)) {
1067 1069                                                  VN_RELE(cmpvp);
1068 1070  
1069 1071                                                  complen = strlen(rpn.pn_path);
1070 1072                                                  bufloc -= complen;
1071 1073                                                  if (bufloc < buf) {
1072 1074                                                          err = ERANGE;
1073 1075                                                          goto out;
1074 1076                                                  }
1075 1077                                                  bcopy(rpn.pn_path, bufloc,
1076 1078                                                      complen);
1077 1079                                                  break;
1078 1080                                          } else {
1079 1081                                                  VN_RELE(cmpvp);
1080 1082                                          }
1081 1083                                  }
1082 1084                          } else {
1083 1085                                  refstr_rele(mntpt);
1084 1086                          }
1085 1087                  }
1086 1088  
1087 1089                  /*
1088 1090                   * Shortcut: see if this vnode has correct v_path. If so,
1089 1091                   * we have the work done.
1090 1092                   */
1091 1093                  mutex_enter(&vp->v_lock);
1092 1094                  if (vp->v_path != NULL) {
1093 1095  
1094 1096                          if ((err = pn_set(&pn, vp->v_path)) == 0) {
1095 1097                                  mutex_exit(&vp->v_lock);
1096 1098                                  rpn.pn_path = rpn.pn_buf;
1097 1099  
1098 1100                                  /*
1099 1101                                   * Ensure the v_path pointing to correct vnode
1100 1102                                   */
1101 1103                                  VN_HOLD(vrootp);
1102 1104                                  if (vrootp != rootdir)
1103 1105                                          VN_HOLD(vrootp);
1104 1106                                  if (lookuppnvp(&pn, &rpn, flags, NULL,
1105 1107                                      &cmpvp, vrootp, vrootp, cr) == 0) {
1106 1108  
1107 1109                                          if (VN_CMP(vp, cmpvp)) {
1108 1110                                                  VN_RELE(cmpvp);
1109 1111  
1110 1112                                                  complen = strlen(rpn.pn_path);
1111 1113                                                  bufloc -= complen;
1112 1114                                                  if (bufloc < buf) {
1113 1115                                                          err = ERANGE;
1114 1116                                                          goto out;
1115 1117                                                  }
1116 1118                                                  bcopy(rpn.pn_path, bufloc,
1117 1119                                                      complen);
1118 1120                                                  break;
1119 1121                                          } else {
1120 1122                                                  VN_RELE(cmpvp);
1121 1123                                          }
1122 1124                                  }
1123 1125                          } else {
1124 1126                                  mutex_exit(&vp->v_lock);
1125 1127                          }
1126 1128                  } else {
1127 1129                          mutex_exit(&vp->v_lock);
1128 1130                  }
1129 1131  
1130 1132                  /*
1131 1133                   * Shortcuts failed, search for this vnode in its parent.  If
1132 1134                   * this is a mountpoint, then get the vnode underneath.
1133 1135                   */
1134 1136                  if (vp->v_flag & VROOT)
1135 1137                          vp = vn_under(vp);
1136 1138                  if ((err = VOP_LOOKUP(vp, "..", &pvp, &emptypn, 0, vrootp, cr,
1137 1139                      NULL, NULL, NULL)) != 0)
1138 1140                          goto out;
1139 1141  
1140 1142                  /*
1141 1143                   * With extended attributes, it's possible for a directory to
1142 1144                   * have a parent that is a regular file.  Check for that here.
1143 1145                   */
1144 1146                  if (pvp->v_type != VDIR) {
1145 1147                          err = ENOTDIR;
1146 1148                          goto out;
1147 1149                  }
1148 1150  
1149 1151                  /*
1150 1152                   * If this is true, something strange has happened.  This is
1151 1153                   * only true if we are the root of a filesystem, which should
1152 1154                   * have been caught by the check above.
1153 1155                   */
1154 1156                  if (VN_CMP(pvp, vp)) {
1155 1157                          err = ENOENT;
1156 1158                          goto out;
1157 1159                  }
1158 1160  
1159 1161                  /*
1160 1162                   * Check if we have read and search privilege so, that
1161 1163                   * we can lookup the path in the directory
1162 1164                   */
1163 1165                  vprivs = (flags & LOOKUP_CHECKREAD) ? VREAD | VEXEC : VEXEC;
1164 1166                  if ((err = VOP_ACCESS(pvp, vprivs, 0, cr, NULL)) != 0) {
1165 1167                          goto out;
1166 1168                  }
1167 1169  
1168 1170                  /*
1169 1171                   * Try to obtain the path component from dnlc cache
1170 1172                   * before searching through the directory.
1171 1173                   */
1172 1174                  if ((cmpvp = dnlc_reverse_lookup(vp, dbuf, dlen)) != NULL) {
1173 1175                          /*
1174 1176                           * If we got parent vnode as a result,
1175 1177                           * then the answered path is correct.
1176 1178                           */
1177 1179                          if (VN_CMP(cmpvp, pvp)) {
1178 1180                                  VN_RELE(cmpvp);
1179 1181                                  complen = strlen(dbuf);
1180 1182                                  bufloc -= complen;
1181 1183                                  if (bufloc <= buf) {
1182 1184                                          err = ENAMETOOLONG;
1183 1185                                          goto out;
1184 1186                                  }
1185 1187                                  bcopy(dbuf, bufloc, complen);
1186 1188  
1187 1189                                  /* Prepend a slash to the current path */
1188 1190                                  *--bufloc = '/';
1189 1191  
1190 1192                                  /* And continue with the next component */
1191 1193                                  VN_RELE(vp);
1192 1194                                  vp = pvp;
1193 1195                                  pvp = NULL;
1194 1196                                  continue;
1195 1197                          } else {
1196 1198                                  VN_RELE(cmpvp);
1197 1199                          }
1198 1200                  }
1199 1201  
1200 1202                  /*
1201 1203                   * Search the parent directory for the entry corresponding to
1202 1204                   * this vnode.
1203 1205                   */
1204 1206                  if ((err = dirfindvp(vrootp, pvp, vp, cr, dbuf, dlen, &dp))
1205 1207                      != 0)
1206 1208                          goto out;
1207 1209                  complen = strlen(dp->d_name);
1208 1210                  bufloc -= complen;
1209 1211                  if (bufloc <= buf) {
1210 1212                          err = ENAMETOOLONG;
1211 1213                          goto out;
1212 1214                  }
1213 1215                  bcopy(dp->d_name, bufloc, complen);
1214 1216  
1215 1217                  /* Prepend a slash to the current path.  */
1216 1218                  *--bufloc = '/';
1217 1219  
1218 1220                  /* And continue with the next component */
1219 1221                  VN_RELE(vp);
1220 1222                  vp = pvp;
1221 1223                  pvp = NULL;
1222 1224          }
1223 1225  
1224 1226          /*
1225 1227           * Place the path at the beginning of the buffer.
1226 1228           */
1227 1229          if (bufloc != buf)
1228 1230                  ovbcopy(bufloc, buf, buflen - (bufloc - buf));
1229 1231  
1230 1232  out:
1231 1233          /*
1232 1234           * If the error was ESTALE and the current directory to look in
1233 1235           * was the root for this lookup, the root for a mounted file
1234 1236           * system, or the starting directory for lookups, then
1235 1237           * return ENOENT instead of ESTALE.  In this case, no recovery
1236 1238           * is possible by the higher level.  If ESTALE was returned for
1237 1239           * some intermediate directory along the path, then recovery
1238 1240           * is potentially possible and retrying from the higher level
1239 1241           * will either correct the situation by purging stale cache
1240 1242           * entries or eventually get back to the point where no recovery
1241 1243           * is possible.
1242 1244           */
1243 1245          if (err == ESTALE &&
1244 1246              (VN_CMP(vp, vrootp) || (vp->v_flag & VROOT) || vp == startvp))
1245 1247                  err = ENOENT;
1246 1248  
1247 1249          kmem_free(dbuf, dlen);
1248 1250          VN_RELE(vp);
1249 1251          if (pvp)
1250 1252                  VN_RELE(pvp);
1251 1253          pn_free(&pn);
1252 1254          pn_free(&rpn);
1253 1255  
1254 1256          return (err);
1255 1257  }
1256 1258  
1257 1259  /*
1258 1260   * The additional flag, LOOKUP_CHECKREAD, is used to enforce artificial
1259 1261   * constraints in order to be standards compliant.  For example, if we have
1260 1262   * the cached path of '/foo/bar', and '/foo' has permissions 100 (execute
1261 1263   * only), then we can legitimately look up the path to the current working
1262 1264   * directory without needing read permission.  Existing standards tests,
1263 1265   * however, assume that we are determining the path by repeatedly looking up
1264 1266   * "..".  We need to keep this behavior in order to maintain backwards
1265 1267   * compatibility.
1266 1268   */
1267 1269  static int
1268 1270  vnodetopath_common(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen,
1269 1271      cred_t *cr, int flags)
1270 1272  {
1271 1273          pathname_t pn, rpn;
1272 1274          int ret, len;
1273 1275          vnode_t *compvp, *pvp, *realvp;
1274 1276          proc_t *p = curproc;
1275 1277          char path[MAXNAMELEN];
1276 1278          int doclose = 0;
1277 1279  
1278 1280          /*
1279 1281           * If vrootp is NULL, get the root for curproc.  Callers with any other
1280 1282           * requirements should pass in a different vrootp.
1281 1283           */
1282 1284          if (vrootp == NULL) {
1283 1285                  mutex_enter(&p->p_lock);
1284 1286                  if ((vrootp = PTOU(p)->u_rdir) == NULL)
1285 1287                          vrootp = rootdir;
1286 1288                  VN_HOLD(vrootp);
1287 1289                  mutex_exit(&p->p_lock);
1288 1290          } else {
1289 1291                  VN_HOLD(vrootp);
1290 1292          }
1291 1293  
1292 1294          /*
1293 1295           * This is to get around an annoying artifact of the /proc filesystem,
1294 1296           * which is the behavior of {cwd/root}.  Trying to resolve this path
1295 1297           * will result in /proc/pid/cwd instead of whatever the real working
1296 1298           * directory is.  We can't rely on VOP_REALVP(), since that will break
1297 1299           * lofs.  The only difference between procfs and lofs is that opening
1298 1300           * the file will return the underling vnode in the case of procfs.
1299 1301           */
1300 1302          if (vp->v_type == VDIR && VOP_REALVP(vp, &realvp, NULL) == 0 &&
1301 1303              realvp != vp) {
1302 1304                  VN_HOLD(vp);
1303 1305                  if (VOP_OPEN(&vp, FREAD, cr, NULL) == 0)
1304 1306                          doclose = 1;
1305 1307                  else
1306 1308                          VN_RELE(vp);
1307 1309          }
1308 1310  
1309 1311          pn_alloc(&pn);
1310 1312  
1311 1313          /*
1312 1314           * Check to see if we have a cached path in the vnode.
1313 1315           */
1314 1316          mutex_enter(&vp->v_lock);
1315 1317          if (vp->v_path != NULL) {
1316 1318                  (void) pn_set(&pn, vp->v_path);
1317 1319                  mutex_exit(&vp->v_lock);
1318 1320  
1319 1321                  pn_alloc(&rpn);
1320 1322  
1321 1323                  /* We should only cache absolute paths */
1322 1324                  ASSERT(pn.pn_buf[0] == '/');
1323 1325  
1324 1326                  /*
1325 1327                   * If we are in a zone or a chroot environment, then we have to
1326 1328                   * take additional steps, since the path to the root might not
1327 1329                   * be readable with the current credentials, even though the
1328 1330                   * process can legitmately access the file.  In this case, we
1329 1331                   * do the following:
1330 1332                   *
1331 1333                   * lookuppnvp() with all privileges to get the resolved path.
1332 1334                   * call localpath() to get the local portion of the path, and
1333 1335                   * continue as normal.
1334 1336                   *
1335 1337                   * If the the conversion to a local path fails, then we continue
1336 1338                   * as normal.  This is a heuristic to make process object file
1337 1339                   * paths available from within a zone.  Because lofs doesn't
1338 1340                   * support page operations, the vnode stored in the seg_t is
1339 1341                   * actually the underlying real vnode, not the lofs node itself.
1340 1342                   * Most of the time, the lofs path is the same as the underlying
1341 1343                   * vnode (for example, /usr/lib/libc.so.1).
1342 1344                   */
1343 1345                  if (vrootp != rootdir) {
1344 1346                          char *local = NULL;
1345 1347                          VN_HOLD(rootdir);
1346 1348                          if (lookuppnvp(&pn, &rpn, FOLLOW,
1347 1349                              NULL, &compvp, rootdir, rootdir, kcred) == 0) {
1348 1350                                  local = localpath(rpn.pn_path, vrootp,
1349 1351                                      kcred);
1350 1352                                  VN_RELE(compvp);
1351 1353                          }
1352 1354  
1353 1355                          /*
1354 1356                           * The original pn was changed through lookuppnvp().
1355 1357                           * Set it to local for next validation attempt.
1356 1358                           */
1357 1359                          if (local) {
1358 1360                                  (void) pn_set(&pn, local);
1359 1361                          } else {
1360 1362                                  goto notcached;
1361 1363                          }
1362 1364                  }
1363 1365  
1364 1366                  /*
1365 1367                   * We should have a local path at this point, so start the
1366 1368                   * search from the root of the current process.
1367 1369                   */
1368 1370                  VN_HOLD(vrootp);
1369 1371                  if (vrootp != rootdir)
1370 1372                          VN_HOLD(vrootp);
1371 1373                  ret = lookuppnvp(&pn, &rpn, FOLLOW | flags, NULL,
1372 1374                      &compvp, vrootp, vrootp, cr);
1373 1375                  if (ret == 0) {
1374 1376                          /*
1375 1377                           * Check to see if the returned vnode is the same as
1376 1378                           * the one we expect.  If not, give up.
1377 1379                           */
1378 1380                          if (!vn_compare(vp, compvp) &&
1379 1381                              !vnode_match(vp, compvp, cr)) {
1380 1382                                  VN_RELE(compvp);
1381 1383                                  goto notcached;
1382 1384                          }
1383 1385  
1384 1386                          VN_RELE(compvp);
1385 1387  
1386 1388                          /*
1387 1389                           * Return the result.
1388 1390                           */
1389 1391                          if (buflen <= rpn.pn_pathlen)
1390 1392                                  goto notcached;
1391 1393  
1392 1394                          bcopy(rpn.pn_path, buf, rpn.pn_pathlen + 1);
1393 1395                          pn_free(&pn);
1394 1396                          pn_free(&rpn);
1395 1397                          VN_RELE(vrootp);
1396 1398                          if (doclose) {
1397 1399                                  (void) VOP_CLOSE(vp, FREAD, 1, 0, cr, NULL);
1398 1400                                  VN_RELE(vp);
1399 1401                          }
1400 1402                          return (0);
  
    | 
      ↓ open down ↓ | 
    1331 lines elided | 
    
      ↑ open up ↑ | 
  
1401 1403                  }
1402 1404  
1403 1405  notcached:
1404 1406                  pn_free(&rpn);
1405 1407          } else {
1406 1408                  mutex_exit(&vp->v_lock);
1407 1409          }
1408 1410  
1409 1411          pn_free(&pn);
1410 1412  
1411      -        if (vp->v_type != VDIR) {
     1413 +        if (PROC_IS_BRANDED(curproc)) {
1412 1414                  /*
     1415 +                 * If v_path doesn't work out and we're in a branded zone,
     1416 +                 * we're not going to bother doing more work here:  because
     1417 +                 * directories from the global can be lofs mounted into odd
     1418 +                 * locations (e.g., /native in an lx zone), it is likely that
     1419 +                 * the DNLC reverse lookup will yield nothing.  Indeed, the
     1420 +                 * only certainty is that the DNLC reverse lookup will be
     1421 +                 * exceedingly painful; we save ourselves the substantial
     1422 +                 * grief of scanning the entire DNLC and kick out with ENOENT
     1423 +                 * in this case.
     1424 +                 */
     1425 +                ret = ENOENT;
     1426 +        } else if (vp->v_type != VDIR) {
     1427 +                /*
1413 1428                   * If we don't have a directory, try to find it in the dnlc via
1414 1429                   * reverse lookup.  Once this is found, we can use the regular
1415 1430                   * directory search to find the full path.
1416 1431                   */
1417 1432                  if ((pvp = dnlc_reverse_lookup(vp, path, MAXNAMELEN)) != NULL) {
1418 1433                          /*
1419 1434                           * Check if we have read privilege so, that
1420 1435                           * we can lookup the path in the directory
1421 1436                           */
1422 1437                          ret = 0;
1423 1438                          if ((flags & LOOKUP_CHECKREAD)) {
1424 1439                                  ret = VOP_ACCESS(pvp, VREAD, 0, cr, NULL);
1425 1440                          }
1426 1441                          if (ret == 0) {
1427 1442                                  ret = dirtopath(vrootp, pvp, buf, buflen,
1428 1443                                      flags, cr);
1429 1444                          }
1430 1445                          if (ret == 0) {
1431 1446                                  len = strlen(buf);
1432 1447                                  if (len + strlen(path) + 1 >= buflen) {
1433 1448                                          ret = ENAMETOOLONG;
1434 1449                                  } else {
1435 1450                                          if (buf[len - 1] != '/')
1436 1451                                                  buf[len++] = '/';
1437 1452                                          bcopy(path, buf + len,
1438 1453                                              strlen(path) + 1);
1439 1454                                  }
1440 1455                          }
1441 1456  
1442 1457                          VN_RELE(pvp);
1443 1458                  } else
1444 1459                          ret = ENOENT;
1445 1460          } else
1446 1461                  ret = dirtopath(vrootp, vp, buf, buflen, flags, cr);
1447 1462  
1448 1463          VN_RELE(vrootp);
1449 1464          if (doclose) {
1450 1465                  (void) VOP_CLOSE(vp, FREAD, 1, 0, cr, NULL);
1451 1466                  VN_RELE(vp);
1452 1467          }
1453 1468  
1454 1469          return (ret);
1455 1470  }
1456 1471  
1457 1472  int
1458 1473  vnodetopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, cred_t *cr)
1459 1474  {
1460 1475          return (vnodetopath_common(vrootp, vp, buf, buflen, cr, 0));
1461 1476  }
1462 1477  
1463 1478  int
1464 1479  dogetcwd(char *buf, size_t buflen)
1465 1480  {
1466 1481          int ret;
1467 1482          vnode_t *vp;
1468 1483          vnode_t *compvp;
1469 1484          refstr_t *cwd, *oldcwd;
1470 1485          const char *value;
1471 1486          pathname_t rpnp, pnp;
1472 1487          proc_t *p = curproc;
1473 1488  
1474 1489          /*
1475 1490           * Check to see if there is a cached version of the cwd.  If so, lookup
1476 1491           * the cached value and make sure it is the same vnode.
1477 1492           */
1478 1493          mutex_enter(&p->p_lock);
1479 1494          if ((cwd = PTOU(p)->u_cwd) != NULL)
1480 1495                  refstr_hold(cwd);
1481 1496          vp = PTOU(p)->u_cdir;
1482 1497          VN_HOLD(vp);
1483 1498          mutex_exit(&p->p_lock);
1484 1499  
1485 1500          /*
1486 1501           * Make sure we have permission to access the current directory.
1487 1502           */
1488 1503          if ((ret = VOP_ACCESS(vp, VEXEC, 0, CRED(), NULL)) != 0) {
1489 1504                  if (cwd != NULL)
1490 1505                          refstr_rele(cwd);
1491 1506                  VN_RELE(vp);
1492 1507                  return (ret);
1493 1508          }
1494 1509  
1495 1510          if (cwd) {
1496 1511                  value = refstr_value(cwd);
1497 1512                  if ((ret = pn_get((char *)value, UIO_SYSSPACE, &pnp)) != 0) {
1498 1513                          refstr_rele(cwd);
1499 1514                          VN_RELE(vp);
1500 1515                          return (ret);
1501 1516                  }
1502 1517  
1503 1518                  pn_alloc(&rpnp);
1504 1519  
1505 1520                  if (lookuppn(&pnp, &rpnp, NO_FOLLOW, NULL, &compvp) == 0) {
1506 1521  
1507 1522                          if (VN_CMP(vp, compvp) &&
1508 1523                              strcmp(value, rpnp.pn_path) == 0) {
1509 1524                                  VN_RELE(compvp);
1510 1525                                  VN_RELE(vp);
1511 1526                                  pn_free(&pnp);
1512 1527                                  pn_free(&rpnp);
1513 1528                                  if (strlen(value) + 1 > buflen) {
1514 1529                                          refstr_rele(cwd);
1515 1530                                          return (ENAMETOOLONG);
1516 1531                                  }
1517 1532                                  bcopy(value, buf, strlen(value) + 1);
1518 1533                                  refstr_rele(cwd);
1519 1534                                  return (0);
1520 1535                          }
1521 1536  
1522 1537                          VN_RELE(compvp);
1523 1538                  }
1524 1539  
1525 1540                  pn_free(&rpnp);
1526 1541                  pn_free(&pnp);
1527 1542  
1528 1543                  refstr_rele(cwd);
1529 1544          }
1530 1545  
1531 1546          ret = vnodetopath_common(NULL, vp, buf, buflen, CRED(),
1532 1547              LOOKUP_CHECKREAD);
1533 1548  
1534 1549          VN_RELE(vp);
1535 1550  
1536 1551          /*
1537 1552           * Store the new cwd and replace the existing cached copy.
1538 1553           */
1539 1554          if (ret == 0)
1540 1555                  cwd = refstr_alloc(buf);
1541 1556          else
1542 1557                  cwd = NULL;
1543 1558  
1544 1559          mutex_enter(&p->p_lock);
1545 1560          oldcwd = PTOU(p)->u_cwd;
1546 1561          PTOU(p)->u_cwd = cwd;
1547 1562          mutex_exit(&p->p_lock);
1548 1563  
1549 1564          if (oldcwd)
1550 1565                  refstr_rele(oldcwd);
1551 1566  
1552 1567          return (ret);
1553 1568  }
  
    | 
      ↓ open down ↓ | 
    131 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX