1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
  25  */
  26 
  27 /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  28 /*        All Rights Reserved   */
  29 
  30 /*
  31  * University Copyright- Copyright (c) 1982, 1986, 1988
  32  * The Regents of the University of California
  33  * All Rights Reserved
  34  *
  35  * University Acknowledgment- Portions of this document are derived from
  36  * software developed by the University of California, Berkeley, and its
  37  * contributors.
  38  */
  39 
  40 #include <sys/types.h>
  41 #include <sys/param.h>
  42 #include <sys/systm.h>
  43 #include <sys/file.h>
  44 #include <sys/errno.h>
  45 #include <sys/cred.h>
  46 #include <sys/user.h>
  47 #include <sys/uio.h>
  48 #include <sys/vfs.h>
  49 #include <sys/vnode.h>
  50 #include <sys/pathname.h>
  51 #include <sys/proc.h>
  52 #include <sys/vtrace.h>
  53 #include <sys/sysmacros.h>
  54 #include <sys/debug.h>
  55 #include <sys/dirent.h>
  56 #include <sys/zone.h>
  57 #include <sys/dnlc.h>
  58 #include <sys/fs/snode.h>
  59 
  60 /*
  61  * Starting at current directory, translate pathname pnp to end.
  62  * Leave pathname of final component in pnp, return the vnode
  63  * for the final component in *compvpp, and return the vnode
  64  * for the parent of the final component in dirvpp.
  65  *
  66  * This is the central routine in pathname translation and handles
  67  * multiple components in pathnames, separating them at /'s.  It also
  68  * implements mounted file systems and processes symbolic links.
  69  *
  70  * vp is the vnode where the directory search should start.
  71  *
  72  * Reference counts: vp must be held prior to calling this function.  rootvp
  73  * should only be held if rootvp != rootdir.
  74  */
  75 int
  76 lookuppnvp(
  77         struct pathname *pnp,           /* pathname to lookup */
  78         struct pathname *rpnp,          /* if non-NULL, return resolved path */
  79         int flags,                      /* follow symlinks */
  80         vnode_t **dirvpp,               /* ptr for parent vnode */
  81         vnode_t **compvpp,              /* ptr for entry vnode */
  82         vnode_t *rootvp,                /* rootvp */
  83         vnode_t *vp,                    /* directory to start search at */
  84         cred_t *cr)                     /* user's credential */
  85 {
  86         vnode_t *cvp;   /* current component vp */
  87         vnode_t *tvp;   /* addressable temp ptr */
  88         char component[MAXNAMELEN];     /* buffer for component (incl null) */
  89         int error;
  90         int nlink;
  91         int lookup_flags;
  92         struct pathname presrvd; /* case preserved name */
  93         struct pathname *pp = NULL;
  94         vnode_t *startvp;
  95         int must_be_directory = 0;
  96         boolean_t retry_with_kcred;
  97 
  98         nlink = 0;
  99         cvp = NULL;
 100         if (rpnp)
 101                 rpnp->pn_pathlen = 0;
 102 
 103         lookup_flags = dirvpp ? LOOKUP_DIR : 0;
 104         if (flags & FIGNORECASE) {
 105                 lookup_flags |= FIGNORECASE;
 106                 pn_alloc(&presrvd);
 107                 pp = &presrvd;
 108         }
 109 
 110         /*
 111          * Eliminate any trailing slashes in the pathname.
 112          * If there are any, we must follow all symlinks.
 113          * Also, we must guarantee that the last component is a directory.
 114          */
 115         if (pn_fixslash(pnp)) {
 116                 flags |= FOLLOW;
 117                 must_be_directory = 1;
 118         }
 119 
 120         startvp = vp;
 121 next:
 122         retry_with_kcred = B_FALSE;
 123 
 124         /*
 125          * Make sure we have a directory.
 126          */
 127         if (vp->v_type != VDIR) {
 128                 error = ENOTDIR;
 129                 goto bad;
 130         }
 131 
 132         if (rpnp && VN_CMP(vp, rootvp))
 133                 (void) pn_set(rpnp, "/");
 134 
 135         /*
 136          * Process the next component of the pathname.
 137          */
 138         if (error = pn_getcomponent(pnp, component)) {
 139                 goto bad;
 140         }
 141 
 142         /*
 143          * Handle "..": two special cases.
 144          * 1. If we're at the root directory (e.g. after chroot or
 145          *    zone_enter) then change ".." to "." so we can't get
 146          *    out of this subtree.
 147          * 2. If this vnode is the root of a mounted file system,
 148          *    then replace it with the vnode that was mounted on
 149          *    so that we take the ".." in the other file system.
 150          */
 151         if (component[0] == '.' && component[1] == '.' && component[2] == 0) {
 152 checkforroot:
 153                 if (VN_CMP(vp, rootvp)) {
 154                         component[1] = '\0';
 155                 } else if (vp->v_flag & VROOT) {
 156                         vfs_t *vfsp;
 157                         cvp = vp;
 158 
 159                         /*
 160                          * While we deal with the vfs pointer from the vnode
 161                          * the filesystem could have been forcefully unmounted
 162                          * and the vnode's v_vfsp could have been invalidated
 163                          * by VFS_UNMOUNT. Hence, we cache v_vfsp and use it
 164                          * with vfs_rlock_wait/vfs_unlock.
 165                          * It is safe to use the v_vfsp even it is freed by
 166                          * VFS_UNMOUNT because vfs_rlock_wait/vfs_unlock
 167                          * do not dereference v_vfsp. It is just used as a
 168                          * magic cookie.
 169                          * One more corner case here is the memory getting
 170                          * reused for another vfs structure. In this case
 171                          * lookuppnvp's vfs_rlock_wait will succeed, domount's
 172                          * vfs_lock will fail and domount will bail out with an
 173                          * error (EBUSY).
 174                          */
 175                         vfsp = cvp->v_vfsp;
 176 
 177                         /*
 178                          * This lock is used to synchronize
 179                          * mounts/unmounts and lookups.
 180                          * Threads doing mounts/unmounts hold the
 181                          * writers version vfs_lock_wait().
 182                          */
 183 
 184                         vfs_rlock_wait(vfsp);
 185 
 186                         /*
 187                          * If this vnode is on a file system that
 188                          * has been forcibly unmounted,
 189                          * we can't proceed. Cancel this operation
 190                          * and return EIO.
 191                          *
 192                          * vfs_vnodecovered is NULL if unmounted.
 193                          * Currently, nfs uses VFS_UNMOUNTED to
 194                          * check if it's a forced-umount. Keep the
 195                          * same checking here as well even though it
 196                          * may not be needed.
 197                          */
 198                         if (((vp = cvp->v_vfsp->vfs_vnodecovered) == NULL) ||
 199                             (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) {
 200                                 vfs_unlock(vfsp);
 201                                 VN_RELE(cvp);
 202                                 if (pp)
 203                                         pn_free(pp);
 204                                 return (EIO);
 205                         }
 206                         VN_HOLD(vp);
 207                         vfs_unlock(vfsp);
 208                         VN_RELE(cvp);
 209                         cvp = NULL;
 210                         /*
 211                          * Crossing mount points. For eg: We are doing
 212                          * a lookup of ".." for file systems root vnode
 213                          * mounted here, and VOP_LOOKUP() (with covered vnode)
 214                          * will be on underlying file systems mount point
 215                          * vnode. Set retry_with_kcred flag as we might end
 216                          * up doing VOP_LOOKUP() with kcred if required.
 217                          */
 218                         retry_with_kcred = B_TRUE;
 219                         goto checkforroot;
 220                 }
 221         }
 222 
 223         /*
 224          * Perform a lookup in the current directory.
 225          */
 226         error = VOP_LOOKUP(vp, component, &tvp, pnp, lookup_flags,
 227             rootvp, cr, NULL, NULL, pp);
 228 
 229         /*
 230          * Retry with kcred - If crossing mount points & error is EACCES.
 231          *
 232          * If we are crossing mount points here and doing ".." lookup,
 233          * VOP_LOOKUP() might fail if the underlying file systems
 234          * mount point has no execute permission. In cases like these,
 235          * we retry VOP_LOOKUP() by giving as much privilage as possible
 236          * by passing kcred credentials.
 237          *
 238          * In case of hierarchical file systems, passing kcred still may
 239          * or may not work.
 240          * For eg: UFS FS --> Mount NFS FS --> Again mount UFS on some
 241          *                      directory inside NFS FS.
 242          */
 243         if ((error == EACCES) && retry_with_kcred)
 244                 error = VOP_LOOKUP(vp, component, &tvp, pnp, lookup_flags,
 245                     rootvp, zone_kcred(), NULL, NULL, pp);
 246 
 247         cvp = tvp;
 248         if (error) {
 249                 cvp = NULL;
 250                 /*
 251                  * On error, return hard error if
 252                  * (a) we're not at the end of the pathname yet, or
 253                  * (b) the caller didn't want the parent directory, or
 254                  * (c) we failed for some reason other than a missing entry.
 255                  */
 256                 if (pn_pathleft(pnp) || dirvpp == NULL || error != ENOENT)
 257                         goto bad;
 258 
 259                 pn_setlast(pnp);
 260                 /*
 261                  * We inform the caller that the desired entry must be
 262                  * a directory by adding a '/' to the component name.
 263                  */
 264                 if (must_be_directory && (error = pn_addslash(pnp)) != 0)
 265                         goto bad;
 266                 *dirvpp = vp;
 267                 if (compvpp != NULL)
 268                         *compvpp = NULL;
 269                 if (rootvp != rootdir)
 270                         VN_RELE(rootvp);
 271                 if (pp)
 272                         pn_free(pp);
 273                 return (0);
 274         }
 275 
 276         /*
 277          * Traverse mount points.
 278          */
 279         if (vn_mountedvfs(cvp) != NULL) {
 280                 tvp = cvp;
 281                 if ((error = traverse(&tvp)) != 0) {
 282                         /*
 283                          * It is required to assign cvp here, because
 284                          * traverse() will return a held vnode which
 285                          * may different than the vnode that was passed
 286                          * in (even in the error case).  If traverse()
 287                          * changes the vnode it releases the original,
 288                          * and holds the new one.
 289                          */
 290                         cvp = tvp;
 291                         goto bad;
 292                 }
 293                 cvp = tvp;
 294         }
 295 
 296         /*
 297          * If we hit a symbolic link and there is more path to be
 298          * translated or this operation does not wish to apply
 299          * to a link, then place the contents of the link at the
 300          * front of the remaining pathname.
 301          */
 302         if (cvp->v_type == VLNK && ((flags & FOLLOW) || pn_pathleft(pnp))) {
 303                 struct pathname linkpath;
 304 
 305                 if (++nlink > MAXSYMLINKS) {
 306                         error = ELOOP;
 307                         goto bad;
 308                 }
 309                 pn_alloc(&linkpath);
 310                 if (error = pn_getsymlink(cvp, &linkpath, cr)) {
 311                         pn_free(&linkpath);
 312                         goto bad;
 313                 }
 314 
 315                 if (pn_pathleft(&linkpath) == 0)
 316                         (void) pn_set(&linkpath, ".");
 317                 error = pn_insert(pnp, &linkpath, strlen(component));
 318                 pn_free(&linkpath);
 319                 if (error)
 320                         goto bad;
 321                 VN_RELE(cvp);
 322                 cvp = NULL;
 323                 if (pnp->pn_pathlen == 0) {
 324                         error = ENOENT;
 325                         goto bad;
 326                 }
 327                 if (pnp->pn_path[0] == '/') {
 328                         do {
 329                                 pnp->pn_path++;
 330                                 pnp->pn_pathlen--;
 331                         } while (pnp->pn_path[0] == '/');
 332                         VN_RELE(vp);
 333                         vp = rootvp;
 334                         VN_HOLD(vp);
 335                 }
 336                 if (pn_fixslash(pnp)) {
 337                         flags |= FOLLOW;
 338                         must_be_directory = 1;
 339                 }
 340                 goto next;
 341         }
 342 
 343         /*
 344          * If rpnp is non-NULL, remember the resolved path name therein.
 345          * Do not include "." components.  Collapse occurrences of
 346          * "previous/..", so long as "previous" is not itself "..".
 347          * Exhausting rpnp results in error ENAMETOOLONG.
 348          */
 349         if (rpnp && strcmp(component, ".") != 0) {
 350                 size_t len;
 351 
 352                 if (strcmp(component, "..") == 0 &&
 353                     rpnp->pn_pathlen != 0 &&
 354                     !((rpnp->pn_pathlen > 2 &&
 355                     strncmp(rpnp->pn_path+rpnp->pn_pathlen-3, "/..", 3) == 0) ||
 356                     (rpnp->pn_pathlen == 2 &&
 357                     strncmp(rpnp->pn_path, "..", 2) == 0))) {
 358                         while (rpnp->pn_pathlen &&
 359                             rpnp->pn_path[rpnp->pn_pathlen-1] != '/')
 360                                 rpnp->pn_pathlen--;
 361                         if (rpnp->pn_pathlen > 1)
 362                                 rpnp->pn_pathlen--;
 363                         rpnp->pn_path[rpnp->pn_pathlen] = '\0';
 364                 } else {
 365                         if (rpnp->pn_pathlen != 0 &&
 366                             rpnp->pn_path[rpnp->pn_pathlen-1] != '/')
 367                                 rpnp->pn_path[rpnp->pn_pathlen++] = '/';
 368                         if (flags & FIGNORECASE) {
 369                                 /*
 370                                  * Return the case-preserved name
 371                                  * within the resolved path.
 372                                  */
 373                                 error = copystr(pp->pn_buf,
 374                                     rpnp->pn_path + rpnp->pn_pathlen,
 375                                     rpnp->pn_bufsize - rpnp->pn_pathlen, &len);
 376                         } else {
 377                                 error = copystr(component,
 378                                     rpnp->pn_path + rpnp->pn_pathlen,
 379                                     rpnp->pn_bufsize - rpnp->pn_pathlen, &len);
 380                         }
 381                         if (error)      /* copystr() returns ENAMETOOLONG */
 382                                 goto bad;
 383                         rpnp->pn_pathlen += (len - 1);
 384                         ASSERT(rpnp->pn_bufsize > rpnp->pn_pathlen);
 385                 }
 386         }
 387 
 388         /*
 389          * If no more components, return last directory (if wanted) and
 390          * last component (if wanted).
 391          */
 392         if (pn_pathleft(pnp) == 0) {
 393                 /*
 394                  * If there was a trailing slash in the pathname,
 395                  * make sure the last component is a directory.
 396                  */
 397                 if (must_be_directory && cvp->v_type != VDIR) {
 398                         error = ENOTDIR;
 399                         goto bad;
 400                 }
 401                 if (dirvpp != NULL) {
 402                         /*
 403                          * Check that we have the real parent and not
 404                          * an alias of the last component.
 405                          */
 406                         if (vn_compare(vp, cvp)) {
 407                                 pn_setlast(pnp);
 408                                 VN_RELE(vp);
 409                                 VN_RELE(cvp);
 410                                 if (rootvp != rootdir)
 411                                         VN_RELE(rootvp);
 412                                 if (pp)
 413                                         pn_free(pp);
 414                                 return (EINVAL);
 415                         }
 416                         *dirvpp = vp;
 417                 } else
 418                         VN_RELE(vp);
 419                 if (pnp->pn_path == pnp->pn_buf)
 420                         (void) pn_set(pnp, ".");
 421                 else
 422                         pn_setlast(pnp);
 423                 if (rpnp) {
 424                         if (VN_CMP(cvp, rootvp))
 425                                 (void) pn_set(rpnp, "/");
 426                         else if (rpnp->pn_pathlen == 0)
 427                                 (void) pn_set(rpnp, ".");
 428                 }
 429 
 430                 if (compvpp != NULL)
 431                         *compvpp = cvp;
 432                 else
 433                         VN_RELE(cvp);
 434                 if (rootvp != rootdir)
 435                         VN_RELE(rootvp);
 436                 if (pp)
 437                         pn_free(pp);
 438                 return (0);
 439         }
 440 
 441         /*
 442          * Skip over slashes from end of last component.
 443          */
 444         while (pnp->pn_path[0] == '/') {
 445                 pnp->pn_path++;
 446                 pnp->pn_pathlen--;
 447         }
 448 
 449         /*
 450          * Searched through another level of directory:
 451          * release previous directory handle and save new (result
 452          * of lookup) as current directory.
 453          */
 454         VN_RELE(vp);
 455         vp = cvp;
 456         cvp = NULL;
 457         goto next;
 458 
 459 bad:
 460         /*
 461          * Error.  Release vnodes and return.
 462          */
 463         if (cvp)
 464                 VN_RELE(cvp);
 465         /*
 466          * If the error was ESTALE and the current directory to look in
 467          * was the root for this lookup, the root for a mounted file
 468          * system, or the starting directory for lookups, then
 469          * return ENOENT instead of ESTALE.  In this case, no recovery
 470          * is possible by the higher level.  If ESTALE was returned for
 471          * some intermediate directory along the path, then recovery
 472          * is potentially possible and retrying from the higher level
 473          * will either correct the situation by purging stale cache
 474          * entries or eventually get back to the point where no recovery
 475          * is possible.
 476          */
 477         if (error == ESTALE &&
 478             (VN_CMP(vp, rootvp) || (vp->v_flag & VROOT) || vp == startvp))
 479                 error = ENOENT;
 480         VN_RELE(vp);
 481         if (rootvp != rootdir)
 482                 VN_RELE(rootvp);
 483         if (pp)
 484                 pn_free(pp);
 485         return (error);
 486 }
 487 
 488 /*
 489  * Traverse a mount point.  Routine accepts a vnode pointer as a reference
 490  * parameter and performs the indirection, releasing the original vnode.
 491  */
 492 int
 493 traverse(vnode_t **cvpp)
 494 {
 495         int error = 0;
 496         vnode_t *cvp;
 497         vnode_t *tvp;
 498         vfs_t *vfsp;
 499 
 500         cvp = *cvpp;
 501 
 502         /*
 503          * If this vnode is mounted on, then we transparently indirect
 504          * to the vnode which is the root of the mounted file system.
 505          * Before we do this we must check that an unmount is not in
 506          * progress on this vnode.
 507          */
 508 
 509         for (;;) {
 510                 /*
 511                  * Used to try to read lock the vnode here.
 512                  */
 513 
 514                 /*
 515                  * Reached the end of the mount chain?
 516                  */
 517                 vfsp = vn_mountedvfs(cvp);
 518                 if (vfsp == NULL) {
 519                         break;
 520                 }
 521 
 522                 /*
 523                  * The read lock must be held across the call to VFS_ROOT() to
 524                  * prevent a concurrent unmount from destroying the vfs.
 525                  */
 526                 error = VFS_ROOT(vfsp, &tvp);
 527                 if (error)
 528                         break;
 529 
 530                 VN_RELE(cvp);
 531 
 532                 cvp = tvp;
 533         }
 534 
 535         *cvpp = cvp;
 536         return (error);
 537 }
 538 
 539 /*
 540  * Get the vnode path, relative to the passed rootvp.
 541  * Our vncache always fills in v_path, so this is easy.
 542  */
 543 /* ARGSUSED */
 544 int
 545 vnodetopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, cred_t *cr)
 546 {
 547         int len, rvp_len = 0;
 548         const char *p = vp->v_path;
 549 
 550         if (vrootp)
 551                 rvp_len = strlen(vrootp->v_path);
 552         len = strlen(p);
 553         if (rvp_len < len)
 554                 p += rvp_len;
 555         else
 556                 p = "/";
 557 
 558         (void) strlcpy(buf, p, buflen);
 559         return (0);
 560 }