1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #pragma ident   "%Z%%M% %I%     %E% SMI"
  27 
  28 #include <sys/types.h>
  29 #include <sys/param.h>
  30 #include <sys/sysmacros.h>
  31 #include <sys/systm.h>
  32 #include <sys/time.h>
  33 #include <sys/vfs.h>
  34 #include <sys/vnode.h>
  35 #include <sys/errno.h>
  36 #include <sys/cmn_err.h>
  37 #include <sys/cred.h>
  38 #include <sys/stat.h>
  39 #include <sys/debug.h>
  40 #include <sys/policy.h>
  41 #include <sys/fs/tmpnode.h>
  42 #include <sys/fs/tmp.h>
  43 #include <sys/vtrace.h>
  44 
  45 static int tdircheckpath(struct tmpnode *, struct tmpnode *, struct cred *);
  46 static int tdirrename(struct tmpnode *, struct tmpnode *, struct tmpnode *,
  47         char *, struct tmpnode *, struct tdirent *, struct cred *);
  48 static void tdirfixdotdot(struct tmpnode *, struct tmpnode *, struct tmpnode *);
  49 static int tdirmaketnode(struct tmpnode *, struct tmount *, struct vattr *,
  50         enum de_op, struct tmpnode **, struct cred *);
  51 static int tdiraddentry(struct tmpnode *, struct tmpnode *, char *,
  52         enum de_op, struct tmpnode *);
  53 
  54 
  55 #define T_HASH_SIZE     8192            /* must be power of 2 */
  56 #define T_MUTEX_SIZE    64
  57 
  58 /* Non-static so compilers won't constant-fold these away. */
  59 clock_t tmpfs_rename_backoff_delay = 1;
  60 unsigned int tmpfs_rename_backoff_tries = 0;
  61 unsigned long tmpfs_rename_loops = 0;
  62 
  63 static struct tdirent   *t_hashtable[T_HASH_SIZE];
  64 static kmutex_t          t_hashmutex[T_MUTEX_SIZE];
  65 
  66 #define T_HASH_INDEX(a)         ((a) & (T_HASH_SIZE-1))
  67 #define T_MUTEX_INDEX(a)        ((a) & (T_MUTEX_SIZE-1))
  68 
  69 #define TMPFS_HASH(tp, name, hash)                              \
  70         {                                                       \
  71                 char Xc, *Xcp;                                  \
  72                 hash = (uint_t)(uintptr_t)(tp) >> 8;              \
  73                 for (Xcp = (name); (Xc = *Xcp) != 0; Xcp++)     \
  74                         hash = (hash << 4) + hash + (uint_t)Xc;   \
  75         }
  76 
  77 void
  78 tmpfs_hash_init(void)
  79 {
  80         int     ix;
  81 
  82         for (ix = 0; ix < T_MUTEX_SIZE; ix++)
  83                 mutex_init(&t_hashmutex[ix], NULL, MUTEX_DEFAULT, NULL);
  84 }
  85 
  86 /*
  87  * This routine is where the rubber meets the road for identities.
  88  */
  89 static void
  90 tmpfs_hash_in(struct tdirent *t)
  91 {
  92         uint_t          hash;
  93         struct tdirent  **prevpp;
  94         kmutex_t        *t_hmtx;
  95 
  96         TMPFS_HASH(t->td_parent, t->td_name, hash);
  97         t->td_hash = hash;
  98         prevpp = &t_hashtable[T_HASH_INDEX(hash)];
  99         t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
 100         mutex_enter(t_hmtx);
 101         t->td_link = *prevpp;
 102         *prevpp = t;
 103         mutex_exit(t_hmtx);
 104 }
 105 
 106 /*
 107  * Remove tdirent *t from the hash list.
 108  */
 109 static void
 110 tmpfs_hash_out(struct tdirent *t)
 111 {
 112         uint_t          hash;
 113         struct tdirent  **prevpp;
 114         kmutex_t        *t_hmtx;
 115 
 116         hash = t->td_hash;
 117         prevpp = &t_hashtable[T_HASH_INDEX(hash)];
 118         t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
 119         mutex_enter(t_hmtx);
 120         while (*prevpp != t)
 121                 prevpp = &(*prevpp)->td_link;
 122         *prevpp = t->td_link;
 123         mutex_exit(t_hmtx);
 124 }
 125 
 126 /*
 127  * Currently called by tdirrename() only.
 128  * rename operation needs to be done with lock held, to ensure that
 129  * no other operations can access the tmpnode at the same instance.
 130  */
 131 static void
 132 tmpfs_hash_change(struct tdirent *tdp, struct tmpnode *fromtp)
 133 {
 134         uint_t          hash;
 135         kmutex_t        *t_hmtx;
 136 
 137         hash = tdp->td_hash;
 138         t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
 139         mutex_enter(t_hmtx);
 140         tdp->td_tmpnode = fromtp;
 141         mutex_exit(t_hmtx);
 142 }
 143 
 144 static struct tdirent *
 145 tmpfs_hash_lookup(char *name, struct tmpnode *parent, uint_t hold,
 146         struct tmpnode **found)
 147 {
 148         struct tdirent  *l;
 149         uint_t          hash;
 150         kmutex_t        *t_hmtx;
 151         struct tmpnode  *tnp;
 152 
 153         TMPFS_HASH(parent, name, hash);
 154         t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
 155         mutex_enter(t_hmtx);
 156         l = t_hashtable[T_HASH_INDEX(hash)];
 157         while (l) {
 158                 if ((l->td_hash == hash) &&
 159                     (l->td_parent == parent) &&
 160                     (strcmp(l->td_name, name) == 0)) {
 161                         /*
 162                          * We need to make sure that the tmpnode that
 163                          * we put a hold on is the same one that we pass back.
 164                          * Hence, temporary variable tnp is necessary.
 165                          */
 166                         tnp = l->td_tmpnode;
 167                         if (hold) {
 168                                 ASSERT(tnp);
 169                                 tmpnode_hold(tnp);
 170                         }
 171                         if (found)
 172                                 *found = tnp;
 173                         mutex_exit(t_hmtx);
 174                         return (l);
 175                 } else {
 176                         l = l->td_link;
 177                 }
 178         }
 179         mutex_exit(t_hmtx);
 180         return (NULL);
 181 }
 182 
 183 /*
 184  * Search directory 'parent' for entry 'name'.
 185  *
 186  * The calling thread can't hold the write version
 187  * of the rwlock for the directory being searched
 188  *
 189  * 0 is returned on success and *foundtp points
 190  * to the found tmpnode with its vnode held.
 191  */
 192 int
 193 tdirlookup(
 194         struct tmpnode *parent,
 195         char *name,
 196         struct tmpnode **foundtp,
 197         struct cred *cred)
 198 {
 199         int error;
 200 
 201         *foundtp = NULL;
 202         if (parent->tn_type != VDIR)
 203                 return (ENOTDIR);
 204 
 205         if ((error = tmp_taccess(parent, VEXEC, cred)))
 206                 return (error);
 207 
 208         if (*name == '\0') {
 209                 tmpnode_hold(parent);
 210                 *foundtp = parent;
 211                 return (0);
 212         }
 213 
 214         /*
 215          * Search the directory for the matching name
 216          * We need the lock protecting the tn_dir list
 217          * so that it doesn't change out from underneath us.
 218          * tmpfs_hash_lookup() will pass back the tmpnode
 219          * with a hold on it.
 220          */
 221 
 222         if (tmpfs_hash_lookup(name, parent, 1, foundtp) != NULL) {
 223                 ASSERT(*foundtp);
 224                 return (0);
 225         }
 226 
 227         return (ENOENT);
 228 }
 229 
 230 /*
 231  * Enter a directory entry for 'name' and 'tp' into directory 'dir'
 232  *
 233  * Returns 0 on success.
 234  */
 235 int
 236 tdirenter(
 237         struct tmount   *tm,
 238         struct tmpnode  *dir,           /* target directory to make entry in */
 239         char            *name,          /* name of entry */
 240         enum de_op      op,             /* entry operation */
 241         struct tmpnode  *fromparent,    /* source directory if rename */
 242         struct tmpnode  *tp,            /* source tmpnode, if link/rename */
 243         struct vattr    *va,
 244         struct tmpnode  **tpp,          /* return tmpnode, if create/mkdir */
 245         struct cred     *cred,
 246         caller_context_t *ctp)
 247 {
 248         struct tdirent *tdp;
 249         struct tmpnode *found = NULL;
 250         int error = 0;
 251         char *s;
 252 
 253         /*
 254          * tn_rwlock is held to serialize direnter and dirdeletes
 255          */
 256         ASSERT(RW_WRITE_HELD(&dir->tn_rwlock));
 257         ASSERT(dir->tn_type == VDIR);
 258 
 259         /*
 260          * Don't allow '/' characters in pathname component
 261          * (thus in ufs_direnter()).
 262          */
 263         for (s = name; *s; s++)
 264                 if (*s == '/')
 265                         return (EACCES);
 266 
 267         if (name[0] == '\0')
 268                 panic("tdirenter: NULL name");
 269 
 270         /*
 271          * For link and rename lock the source entry and check the link count
 272          * to see if it has been removed while it was unlocked.
 273          */
 274         if (op == DE_LINK || op == DE_RENAME) {
 275                 if (tp != dir) {
 276                         unsigned int tries = 0;
 277 
 278                         /*
 279                          * If we are acquiring tp->tn_rwlock (for SOURCE)
 280                          * inside here, we must consider the following:
 281                          *
 282                          * - dir->tn_rwlock (TARGET) is already HELD (see
 283                          * above ASSERT()).
 284                          *
 285                          * - It is possible our SOURCE is a parent of our
 286                          * TARGET. Yes it's unusual, but it will return an
 287                          * error below via tdircheckpath().
 288                          *
 289                          * - It is also possible that another thread,
 290                          * concurrent to this one, is performing
 291                          * rmdir(TARGET), which means it will first acquire
 292                          * SOURCE's lock, THEN acquire TARGET's lock, which
 293                          * could result in this thread holding TARGET and
 294                          * trying for SOURCE, but the other thread holding
 295                          * SOURCE and trying for TARGET.  This is deadlock,
 296                          * and it's inducible.
 297                          *
 298                          * To prevent this, we borrow some techniques from UFS
 299                          * and rw_tryenter(), delaying if we fail, and
 300                          * if someone tweaks the number of backoff tries to be
 301                          * nonzero, return EBUSY after that number of tries.
 302                          */
 303                         while (!rw_tryenter(&tp->tn_rwlock, RW_WRITER)) {
 304                                 /*
 305                                  * Sloppy, but this is a diagnostic so atomic
 306                                  * increment would be overkill.
 307                                  */
 308                                 tmpfs_rename_loops++;
 309 
 310                                 if (tmpfs_rename_backoff_tries != 0) {
 311                                         if (tries > tmpfs_rename_backoff_tries)
 312                                                 return (EBUSY);
 313                                         tries++;
 314                                 }
 315                                 /*
 316                                  * NOTE: We're still holding dir->tn_rwlock,
 317                                  * so drop it over the delay, so any other
 318                                  * thread can get its business done.
 319                                  *
 320                                  * No state change or state inspection happens
 321                                  * prior to here, so it is not wholly dangerous
 322                                  * to release-and-reacquire dir->tn_rwlock.
 323                                  *
 324                                  * Hold the vnode of dir in case it gets
 325                                  * released by another thread, though.
 326                                  */
 327                                 VN_HOLD(TNTOV(dir));
 328                                 rw_exit(&dir->tn_rwlock);
 329                                 delay(tmpfs_rename_backoff_delay);
 330                                 rw_enter(&dir->tn_rwlock, RW_WRITER);
 331                                 VN_RELE(TNTOV(dir));
 332                         }
 333                 }
 334                 mutex_enter(&tp->tn_tlock);
 335                 if (tp->tn_nlink == 0) {
 336                         mutex_exit(&tp->tn_tlock);
 337                         if (tp != dir)
 338                                 rw_exit(&tp->tn_rwlock);
 339                         return (ENOENT);
 340                 }
 341 
 342                 if (tp->tn_nlink == MAXLINK) {
 343                         mutex_exit(&tp->tn_tlock);
 344                         if (tp != dir)
 345                                 rw_exit(&tp->tn_rwlock);
 346                         return (EMLINK);
 347                 }
 348                 tp->tn_nlink++;
 349                 gethrestime(&tp->tn_ctime);
 350                 mutex_exit(&tp->tn_tlock);
 351                 if (tp != dir)
 352                         rw_exit(&tp->tn_rwlock);
 353         }
 354 
 355         /*
 356          * This might be a "dangling detached directory".
 357          * it could have been removed, but a reference
 358          * to it kept in u_cwd.  don't bother searching
 359          * it, and with any luck the user will get tired
 360          * of dealing with us and cd to some absolute
 361          * pathway.  *sigh*, thus in ufs, too.
 362          */
 363         if (dir->tn_nlink == 0) {
 364                 error = ENOENT;
 365                 goto out;
 366         }
 367 
 368         /*
 369          * If this is a rename of a directory and the parent is
 370          * different (".." must be changed), then the source
 371          * directory must not be in the directory hierarchy
 372          * above the target, as this would orphan everything
 373          * below the source directory.
 374          */
 375         if (op == DE_RENAME) {
 376                 if (tp == dir) {
 377                         error = EINVAL;
 378                         goto out;
 379                 }
 380                 if (tp->tn_type == VDIR) {
 381                         if ((fromparent != dir) &&
 382                             (error = tdircheckpath(tp, dir, cred))) {
 383                                 goto out;
 384                         }
 385                 }
 386         }
 387 
 388         /*
 389          * Search for the entry.  Return "found" if it exists.
 390          */
 391         tdp = tmpfs_hash_lookup(name, dir, 1, &found);
 392 
 393         if (tdp) {
 394                 ASSERT(found);
 395                 switch (op) {
 396                 case DE_CREATE:
 397                 case DE_MKDIR:
 398                         if (tpp) {
 399                                 *tpp = found;
 400                                 error = EEXIST;
 401                         } else {
 402                                 tmpnode_rele(found);
 403                         }
 404                         break;
 405 
 406                 case DE_RENAME:
 407                         error = tdirrename(fromparent, tp,
 408                             dir, name, found, tdp, cred);
 409                         if (error == 0) {
 410                                 if (found != NULL) {
 411                                         vnevent_rename_dest(TNTOV(found),
 412                                             TNTOV(dir), name, ctp);
 413                                 }
 414                         }
 415 
 416                         tmpnode_rele(found);
 417                         break;
 418 
 419                 case DE_LINK:
 420                         /*
 421                          * Can't link to an existing file.
 422                          */
 423                         error = EEXIST;
 424                         tmpnode_rele(found);
 425                         break;
 426                 }
 427         } else {
 428 
 429                 /*
 430                  * The entry does not exist. Check write permission in
 431                  * directory to see if entry can be created.
 432                  */
 433                 if (error = tmp_taccess(dir, VWRITE, cred))
 434                         goto out;
 435                 if (op == DE_CREATE || op == DE_MKDIR) {
 436                         /*
 437                          * Make new tmpnode and directory entry as required.
 438                          */
 439                         error = tdirmaketnode(dir, tm, va, op, &tp, cred);
 440                         if (error)
 441                                 goto out;
 442                 }
 443                 if (error = tdiraddentry(dir, tp, name, op, fromparent)) {
 444                         if (op == DE_CREATE || op == DE_MKDIR) {
 445                                 /*
 446                                  * Unmake the inode we just made.
 447                                  */
 448                                 rw_enter(&tp->tn_rwlock, RW_WRITER);
 449                                 if ((tp->tn_type) == VDIR) {
 450                                         ASSERT(tdp == NULL);
 451                                         /*
 452                                          * cleanup allocs made by tdirinit()
 453                                          */
 454                                         tdirtrunc(tp);
 455                                 }
 456                                 mutex_enter(&tp->tn_tlock);
 457                                 tp->tn_nlink = 0;
 458                                 mutex_exit(&tp->tn_tlock);
 459                                 gethrestime(&tp->tn_ctime);
 460                                 rw_exit(&tp->tn_rwlock);
 461                                 tmpnode_rele(tp);
 462                                 tp = NULL;
 463                         }
 464                 } else if (tpp) {
 465                         *tpp = tp;
 466                 } else if (op == DE_CREATE || op == DE_MKDIR) {
 467                         tmpnode_rele(tp);
 468                 }
 469         }
 470 
 471 out:
 472         if (error && (op == DE_LINK || op == DE_RENAME)) {
 473                 /*
 474                  * Undo bumped link count.
 475                  */
 476                 DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock);
 477                 gethrestime(&tp->tn_ctime);
 478         }
 479         return (error);
 480 }
 481 
 482 /*
 483  * Delete entry tp of name "nm" from dir.
 484  * Free dir entry space and decrement link count on tmpnode(s).
 485  *
 486  * Return 0 on success.
 487  */
 488 int
 489 tdirdelete(
 490         struct tmpnode *dir,
 491         struct tmpnode *tp,
 492         char *nm,
 493         enum dr_op op,
 494         struct cred *cred)
 495 {
 496         struct tdirent *tpdp;
 497         int error;
 498         size_t namelen;
 499         struct tmpnode *tnp;
 500         timestruc_t now;
 501 
 502         ASSERT(RW_WRITE_HELD(&dir->tn_rwlock));
 503         ASSERT(RW_WRITE_HELD(&tp->tn_rwlock));
 504         ASSERT(dir->tn_type == VDIR);
 505 
 506         if (nm[0] == '\0')
 507                 panic("tdirdelete: NULL name for %p", (void *)tp);
 508 
 509         /*
 510          * return error when removing . and ..
 511          */
 512         if (nm[0] == '.') {
 513                 if (nm[1] == '\0')
 514                         return (EINVAL);
 515                 if (nm[1] == '.' && nm[2] == '\0')
 516                         return (EEXIST); /* thus in ufs */
 517         }
 518 
 519         if (error = tmp_taccess(dir, VEXEC|VWRITE, cred))
 520                 return (error);
 521 
 522         /*
 523          * If the parent directory is "sticky", then the user must
 524          * own the parent directory or the file in it, or else must
 525          * have permission to write the file.  Otherwise it may not
 526          * be deleted (except by privileged users).
 527          * Same as ufs_dirremove.
 528          */
 529         if ((error = tmp_sticky_remove_access(dir, tp, cred)) != 0)
 530                 return (error);
 531 
 532         if (dir->tn_dir == NULL)
 533                 return (ENOENT);
 534 
 535         tpdp = tmpfs_hash_lookup(nm, dir, 0, &tnp);
 536         if (tpdp == NULL) {
 537                 /*
 538                  * If it is gone, some other thread got here first!
 539                  * Return error ENOENT.
 540                  */
 541                 return (ENOENT);
 542         }
 543 
 544         /*
 545          * If the tmpnode in the tdirent changed, we were probably
 546          * the victim of a concurrent rename operation.  The original
 547          * is gone, so return that status (same as UFS).
 548          */
 549         if (tp != tnp)
 550                 return (ENOENT);
 551 
 552         tmpfs_hash_out(tpdp);
 553 
 554         /*
 555          * Take tpdp out of the directory list.
 556          */
 557         ASSERT(tpdp->td_next != tpdp);
 558         ASSERT(tpdp->td_prev != tpdp);
 559         if (tpdp->td_prev) {
 560                 tpdp->td_prev->td_next = tpdp->td_next;
 561         }
 562         if (tpdp->td_next) {
 563                 tpdp->td_next->td_prev = tpdp->td_prev;
 564         }
 565 
 566         /*
 567          * If the roving slot pointer happens to match tpdp,
 568          * point it at the previous dirent.
 569          */
 570         if (dir->tn_dir->td_prev == tpdp) {
 571                 dir->tn_dir->td_prev = tpdp->td_prev;
 572         }
 573         ASSERT(tpdp->td_next != tpdp);
 574         ASSERT(tpdp->td_prev != tpdp);
 575 
 576         /*
 577          * tpdp points to the correct directory entry
 578          */
 579         namelen = strlen(tpdp->td_name) + 1;
 580 
 581         tmp_memfree(tpdp, sizeof (struct tdirent) + namelen);
 582         dir->tn_size -= (sizeof (struct tdirent) + namelen);
 583         dir->tn_dirents--;
 584 
 585         gethrestime(&now);
 586         dir->tn_mtime = now;
 587         dir->tn_ctime = now;
 588         tp->tn_ctime = now;
 589 
 590         ASSERT(tp->tn_nlink > 0);
 591         DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock);
 592         if (op == DR_RMDIR && tp->tn_type == VDIR) {
 593                 tdirtrunc(tp);
 594                 ASSERT(tp->tn_nlink == 0);
 595         }
 596         return (0);
 597 }
 598 
 599 /*
 600  * tdirinit is used internally to initialize a directory (dir)
 601  * with '.' and '..' entries without checking permissions and locking
 602  */
 603 void
 604 tdirinit(
 605         struct tmpnode *parent,         /* parent of directory to initialize */
 606         struct tmpnode *dir)            /* the new directory */
 607 {
 608         struct tdirent *dot, *dotdot;
 609         timestruc_t now;
 610 
 611         ASSERT(RW_WRITE_HELD(&parent->tn_rwlock));
 612         ASSERT(dir->tn_type == VDIR);
 613 
 614         dot = tmp_memalloc(sizeof (struct tdirent) + 2, TMP_MUSTHAVE);
 615         dotdot = tmp_memalloc(sizeof (struct tdirent) + 3, TMP_MUSTHAVE);
 616 
 617         /*
 618          * Initialize the entries
 619          */
 620         dot->td_tmpnode = dir;
 621         dot->td_offset = 0;
 622         dot->td_name = (char *)dot + sizeof (struct tdirent);
 623         dot->td_name[0] = '.';
 624         dot->td_parent = dir;
 625         tmpfs_hash_in(dot);
 626 
 627         dotdot->td_tmpnode = parent;
 628         dotdot->td_offset = 1;
 629         dotdot->td_name = (char *)dotdot + sizeof (struct tdirent);
 630         dotdot->td_name[0] = '.';
 631         dotdot->td_name[1] = '.';
 632         dotdot->td_parent = dir;
 633         tmpfs_hash_in(dotdot);
 634 
 635         /*
 636          * Initialize directory entry list.
 637          */
 638         dot->td_next = dotdot;
 639         dot->td_prev = dotdot;       /* dot's td_prev holds roving slot pointer */
 640         dotdot->td_next = NULL;
 641         dotdot->td_prev = dot;
 642 
 643         gethrestime(&now);
 644         dir->tn_mtime = now;
 645         dir->tn_ctime = now;
 646 
 647         /*
 648          * Link counts are special for the hidden attribute directory.
 649          * The only explicit reference in the name space is "." and
 650          * the reference through ".." is not counted on the parent
 651          * file. The attrdir is created as a side effect to lookup,
 652          * so don't change the ctime of the parent.
 653          * Since tdirinit is called with both dir and parent being the
 654          * same for the root vnode, we need to increment this before we set
 655          * tn_nlink = 2 below.
 656          */
 657         if (!(dir->tn_vnode->v_flag & V_XATTRDIR)) {
 658                 INCR_COUNT(&parent->tn_nlink, &parent->tn_tlock);
 659                 parent->tn_ctime = now;
 660         }
 661 
 662         dir->tn_dir = dot;
 663         dir->tn_size = 2 * sizeof (struct tdirent) + 5;      /* dot and dotdot */
 664         dir->tn_dirents = 2;
 665         dir->tn_nlink = 2;
 666 }
 667 
 668 
 669 /*
 670  * tdirtrunc is called to remove all directory entries under this directory.
 671  */
 672 void
 673 tdirtrunc(struct tmpnode *dir)
 674 {
 675         struct tdirent *tdp;
 676         struct tmpnode *tp;
 677         size_t namelen;
 678         timestruc_t now;
 679         int isvattrdir, isdotdot, skip_decr;
 680 
 681         ASSERT(RW_WRITE_HELD(&dir->tn_rwlock));
 682         ASSERT(dir->tn_type == VDIR);
 683 
 684         isvattrdir = (dir->tn_vnode->v_flag & V_XATTRDIR) ? 1 : 0;
 685         for (tdp = dir->tn_dir; tdp; tdp = dir->tn_dir) {
 686                 ASSERT(tdp->td_next != tdp);
 687                 ASSERT(tdp->td_prev != tdp);
 688                 ASSERT(tdp->td_tmpnode);
 689 
 690                 dir->tn_dir = tdp->td_next;
 691                 namelen = strlen(tdp->td_name) + 1;
 692 
 693                 /*
 694                  * Adjust the link counts to account for this directory
 695                  * entry removal. Hidden attribute directories may
 696                  * not be empty as they may be truncated as a side-
 697                  * effect of removing the parent. We do hold/rele
 698                  * operations to free up these tmpnodes.
 699                  *
 700                  * Skip the link count adjustment for parents of
 701                  * attribute directories as those link counts
 702                  * do not include the ".." reference in the hidden
 703                  * directories.
 704                  */
 705                 tp = tdp->td_tmpnode;
 706                 isdotdot = (strcmp("..", tdp->td_name) == 0);
 707                 skip_decr = (isvattrdir && isdotdot);
 708                 if (!skip_decr) {
 709                         ASSERT(tp->tn_nlink > 0);
 710                         DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock);
 711                 }
 712 
 713                 tmpfs_hash_out(tdp);
 714 
 715                 tmp_memfree(tdp, sizeof (struct tdirent) + namelen);
 716                 dir->tn_size -= (sizeof (struct tdirent) + namelen);
 717                 dir->tn_dirents--;
 718         }
 719 
 720         gethrestime(&now);
 721         dir->tn_mtime = now;
 722         dir->tn_ctime = now;
 723 
 724         ASSERT(dir->tn_dir == NULL);
 725         ASSERT(dir->tn_size == 0);
 726         ASSERT(dir->tn_dirents == 0);
 727 }
 728 
 729 /*
 730  * Check if the source directory is in the path of the target directory.
 731  * The target directory is locked by the caller.
 732  *
 733  * XXX - The source and target's should be different upon entry.
 734  */
 735 static int
 736 tdircheckpath(
 737         struct tmpnode *fromtp,
 738         struct tmpnode  *toparent,
 739         struct cred     *cred)
 740 {
 741         int     error = 0;
 742         struct tmpnode *dir, *dotdot;
 743         struct tdirent *tdp;
 744 
 745         ASSERT(RW_WRITE_HELD(&toparent->tn_rwlock));
 746 
 747         tdp = tmpfs_hash_lookup("..", toparent, 1, &dotdot);
 748         if (tdp == NULL)
 749                 return (ENOENT);
 750 
 751         ASSERT(dotdot);
 752 
 753         if (dotdot == toparent) {
 754                 /* root of fs.  search trivially satisfied. */
 755                 tmpnode_rele(dotdot);
 756                 return (0);
 757         }
 758         for (;;) {
 759                 /*
 760                  * Return error for cases like "mv c c/d",
 761                  * "mv c c/d/e" and so on.
 762                  */
 763                 if (dotdot == fromtp) {
 764                         tmpnode_rele(dotdot);
 765                         error = EINVAL;
 766                         break;
 767                 }
 768                 dir = dotdot;
 769                 error = tdirlookup(dir, "..", &dotdot, cred);
 770                 if (error) {
 771                         tmpnode_rele(dir);
 772                         break;
 773                 }
 774                 /*
 775                  * We're okay if we traverse the directory tree up to
 776                  * the root directory and don't run into the
 777                  * parent directory.
 778                  */
 779                 if (dir == dotdot) {
 780                         tmpnode_rele(dir);
 781                         tmpnode_rele(dotdot);
 782                         break;
 783                 }
 784                 tmpnode_rele(dir);
 785         }
 786         return (error);
 787 }
 788 
 789 static int
 790 tdirrename(
 791         struct tmpnode *fromparent,     /* parent directory of source */
 792         struct tmpnode *fromtp,         /* source tmpnode */
 793         struct tmpnode *toparent,       /* parent directory of target */
 794         char *nm,                       /* entry we are trying to change */
 795         struct tmpnode *to,             /* target tmpnode */
 796         struct tdirent *where,          /* target tmpnode directory entry */
 797         struct cred *cred)              /* credentials */
 798 {
 799         int error = 0;
 800         int doingdirectory;
 801         timestruc_t now;
 802 
 803 #if defined(lint)
 804         nm = nm;
 805 #endif
 806         ASSERT(RW_WRITE_HELD(&toparent->tn_rwlock));
 807 
 808         /*
 809          * Short circuit rename of something to itself.
 810          */
 811         if (fromtp == to)
 812                 return (ESAME);         /* special KLUDGE error code */
 813 
 814         rw_enter(&fromtp->tn_rwlock, RW_READER);
 815         rw_enter(&to->tn_rwlock, RW_READER);
 816 
 817         /*
 818          * Check that everything is on the same filesystem.
 819          */
 820         if (to->tn_vnode->v_vfsp != toparent->tn_vnode->v_vfsp ||
 821             to->tn_vnode->v_vfsp != fromtp->tn_vnode->v_vfsp) {
 822                 error = EXDEV;
 823                 goto out;
 824         }
 825 
 826         /*
 827          * Must have write permission to rewrite target entry.
 828          * Check for stickyness.
 829          */
 830         if ((error = tmp_taccess(toparent, VWRITE, cred)) != 0 ||
 831             (error = tmp_sticky_remove_access(toparent, to, cred)) != 0)
 832                 goto out;
 833 
 834         /*
 835          * Ensure source and target are compatible (both directories
 836          * or both not directories).  If target is a directory it must
 837          * be empty and have no links to it; in addition it must not
 838          * be a mount point, and both the source and target must be
 839          * writable.
 840          */
 841         doingdirectory = (fromtp->tn_type == VDIR);
 842         if (to->tn_type == VDIR) {
 843                 if (!doingdirectory) {
 844                         error = EISDIR;
 845                         goto out;
 846                 }
 847                 /*
 848                  * vn_vfswlock will prevent mounts from using the directory
 849                  * until we are done.
 850                  */
 851                 if (vn_vfswlock(TNTOV(to))) {
 852                         error = EBUSY;
 853                         goto out;
 854                 }
 855                 if (vn_mountedvfs(TNTOV(to)) != NULL) {
 856                         vn_vfsunlock(TNTOV(to));
 857                         error = EBUSY;
 858                         goto out;
 859                 }
 860 
 861                 mutex_enter(&to->tn_tlock);
 862                 if (to->tn_dirents > 2 || to->tn_nlink > 2) {
 863                         mutex_exit(&to->tn_tlock);
 864                         vn_vfsunlock(TNTOV(to));
 865                         error = EEXIST; /* SIGH should be ENOTEMPTY */
 866                         /*
 867                          * Update atime because checking tn_dirents is
 868                          * logically equivalent to reading the directory
 869                          */
 870                         gethrestime(&to->tn_atime);
 871                         goto out;
 872                 }
 873                 mutex_exit(&to->tn_tlock);
 874         } else if (doingdirectory) {
 875                 error = ENOTDIR;
 876                 goto out;
 877         }
 878 
 879         tmpfs_hash_change(where, fromtp);
 880         gethrestime(&now);
 881         toparent->tn_mtime = now;
 882         toparent->tn_ctime = now;
 883 
 884         /*
 885          * Upgrade to write lock on "to" (i.e., the target tmpnode).
 886          */
 887         rw_exit(&to->tn_rwlock);
 888         rw_enter(&to->tn_rwlock, RW_WRITER);
 889 
 890         /*
 891          * Decrement the link count of the target tmpnode.
 892          */
 893         DECR_COUNT(&to->tn_nlink, &to->tn_tlock);
 894         to->tn_ctime = now;
 895 
 896         if (doingdirectory) {
 897                 /*
 898                  * The entry for "to" no longer exists so release the vfslock.
 899                  */
 900                 vn_vfsunlock(TNTOV(to));
 901 
 902                 /*
 903                  * Decrement the target link count and delete all entires.
 904                  */
 905                 tdirtrunc(to);
 906                 ASSERT(to->tn_nlink == 0);
 907 
 908                 /*
 909                  * Renaming a directory with the parent different
 910                  * requires that ".." be rewritten.  The window is
 911                  * still there for ".." to be inconsistent, but this
 912                  * is unavoidable, and a lot shorter than when it was
 913                  * done in a user process.
 914                  */
 915                 if (fromparent != toparent)
 916                         tdirfixdotdot(fromtp, fromparent, toparent);
 917         }
 918 out:
 919         rw_exit(&to->tn_rwlock);
 920         rw_exit(&fromtp->tn_rwlock);
 921         return (error);
 922 }
 923 
 924 static void
 925 tdirfixdotdot(
 926         struct tmpnode  *fromtp,        /* child directory */
 927         struct tmpnode  *fromparent,    /* old parent directory */
 928         struct tmpnode  *toparent)      /* new parent directory */
 929 {
 930         struct tdirent  *dotdot;
 931 
 932         ASSERT(RW_LOCK_HELD(&toparent->tn_rwlock));
 933 
 934         /*
 935          * Increment the link count in the new parent tmpnode
 936          */
 937         INCR_COUNT(&toparent->tn_nlink, &toparent->tn_tlock);
 938         gethrestime(&toparent->tn_ctime);
 939 
 940         dotdot = tmpfs_hash_lookup("..", fromtp, 0, NULL);
 941 
 942         ASSERT(dotdot->td_tmpnode == fromparent);
 943         dotdot->td_tmpnode = toparent;
 944 
 945         /*
 946          * Decrement the link count of the old parent tmpnode.
 947          * If fromparent is NULL, then this is a new directory link;
 948          * it has no parent, so we need not do anything.
 949          */
 950         if (fromparent != NULL) {
 951                 mutex_enter(&fromparent->tn_tlock);
 952                 if (fromparent->tn_nlink != 0) {
 953                         fromparent->tn_nlink--;
 954                         gethrestime(&fromparent->tn_ctime);
 955                 }
 956                 mutex_exit(&fromparent->tn_tlock);
 957         }
 958 }
 959 
 960 static int
 961 tdiraddentry(
 962         struct tmpnode  *dir,   /* target directory to make entry in */
 963         struct tmpnode  *tp,    /* new tmpnode */
 964         char            *name,
 965         enum de_op      op,
 966         struct tmpnode  *fromtp)
 967 {
 968         struct tdirent *tdp, *tpdp;
 969         size_t          namelen, alloc_size;
 970         timestruc_t     now;
 971 
 972         /*
 973          * Make sure the parent directory wasn't removed from
 974          * underneath the caller.
 975          */
 976         if (dir->tn_dir == NULL)
 977                 return (ENOENT);
 978 
 979         /*
 980          * Check that everything is on the same filesystem.
 981          */
 982         if (tp->tn_vnode->v_vfsp != dir->tn_vnode->v_vfsp)
 983                 return (EXDEV);
 984 
 985         /*
 986          * Allocate and initialize directory entry
 987          */
 988         namelen = strlen(name) + 1;
 989         alloc_size = namelen + sizeof (struct tdirent);
 990         tdp = tmp_memalloc(alloc_size, 0);
 991         if (tdp == NULL)
 992                 return (ENOSPC);
 993 
 994         if ((op == DE_RENAME) && (tp->tn_type == VDIR))
 995                 tdirfixdotdot(tp, fromtp, dir);
 996 
 997         dir->tn_size += alloc_size;
 998         dir->tn_dirents++;
 999         tdp->td_tmpnode = tp;
1000         tdp->td_parent = dir;
1001 
1002         /*
1003          * The directory entry and its name were allocated sequentially.
1004          */
1005         tdp->td_name = (char *)tdp + sizeof (struct tdirent);
1006         (void) strcpy(tdp->td_name, name);
1007 
1008         tmpfs_hash_in(tdp);
1009 
1010         /*
1011          * Some utilities expect the size of a directory to remain
1012          * somewhat static.  For example, a routine which unlinks
1013          * files between calls to readdir(); the size of the
1014          * directory changes from underneath it and so the real
1015          * directory offset in bytes is invalid.  To circumvent
1016          * this problem, we initialize a directory entry with an
1017          * phony offset, and use this offset to determine end of
1018          * file in tmp_readdir.
1019          */
1020         tpdp = dir->tn_dir->td_prev;
1021         /*
1022          * Install at first empty "slot" in directory list.
1023          */
1024         while (tpdp->td_next != NULL && (tpdp->td_next->td_offset -
1025             tpdp->td_offset) <= 1) {
1026                 ASSERT(tpdp->td_next != tpdp);
1027                 ASSERT(tpdp->td_prev != tpdp);
1028                 ASSERT(tpdp->td_next->td_offset > tpdp->td_offset);
1029                 tpdp = tpdp->td_next;
1030         }
1031         tdp->td_offset = tpdp->td_offset + 1;
1032 
1033         /*
1034          * If we're at the end of the dirent list and the offset (which
1035          * is necessarily the largest offset in this directory) is more
1036          * than twice the number of dirents, that means the directory is
1037          * 50% holes.  At this point we reset the slot pointer back to
1038          * the beginning of the directory so we start using the holes.
1039          * The idea is that if there are N dirents, there must also be
1040          * N holes, so we can satisfy the next N creates by walking at
1041          * most 2N entries; thus the average cost of a create is constant.
1042          * Note that we use the first dirent's td_prev as the roving
1043          * slot pointer; it's ugly, but it saves a word in every dirent.
1044          */
1045         if (tpdp->td_next == NULL && tpdp->td_offset > 2 * dir->tn_dirents)
1046                 dir->tn_dir->td_prev = dir->tn_dir->td_next;
1047         else
1048                 dir->tn_dir->td_prev = tdp;
1049 
1050         ASSERT(tpdp->td_next != tpdp);
1051         ASSERT(tpdp->td_prev != tpdp);
1052 
1053         tdp->td_next = tpdp->td_next;
1054         if (tdp->td_next) {
1055                 tdp->td_next->td_prev = tdp;
1056         }
1057         tdp->td_prev = tpdp;
1058         tpdp->td_next = tdp;
1059 
1060         ASSERT(tdp->td_next != tdp);
1061         ASSERT(tdp->td_prev != tdp);
1062         ASSERT(tpdp->td_next != tpdp);
1063         ASSERT(tpdp->td_prev != tpdp);
1064 
1065         gethrestime(&now);
1066         dir->tn_mtime = now;
1067         dir->tn_ctime = now;
1068 
1069         return (0);
1070 }
1071 
1072 static int
1073 tdirmaketnode(
1074         struct tmpnode *dir,
1075         struct tmount   *tm,
1076         struct vattr    *va,
1077         enum    de_op   op,
1078         struct tmpnode **newnode,
1079         struct cred     *cred)
1080 {
1081         struct tmpnode *tp;
1082         enum vtype      type;
1083 
1084         ASSERT(va != NULL);
1085         ASSERT(op == DE_CREATE || op == DE_MKDIR);
1086         if (((va->va_mask & AT_ATIME) && TIMESPEC_OVERFLOW(&va->va_atime)) ||
1087             ((va->va_mask & AT_MTIME) && TIMESPEC_OVERFLOW(&va->va_mtime)))
1088                 return (EOVERFLOW);
1089         type = va->va_type;
1090         tp = tmp_memalloc(sizeof (struct tmpnode), TMP_MUSTHAVE);
1091         tmpnode_init(tm, tp, va, cred);
1092 
1093         /* setup normal file/dir's extended attribute directory */
1094         if (dir->tn_flags & ISXATTR) {
1095                 /* parent dir is , mark file as xattr */
1096                 tp->tn_flags |= ISXATTR;
1097         }
1098 
1099 
1100         if (type == VBLK || type == VCHR) {
1101                 tp->tn_vnode->v_rdev = tp->tn_rdev = va->va_rdev;
1102         } else {
1103                 tp->tn_vnode->v_rdev = tp->tn_rdev = NODEV;
1104         }
1105         tp->tn_vnode->v_type = type;
1106         tp->tn_uid = crgetuid(cred);
1107 
1108         /*
1109          * To determine the group-id of the created file:
1110          *   1) If the gid is set in the attribute list (non-Sun & pre-4.0
1111          *      clients are not likely to set the gid), then use it if
1112          *      the process is privileged, belongs to the target group,
1113          *      or the group is the same as the parent directory.
1114          *   2) If the filesystem was not mounted with the Old-BSD-compatible
1115          *      GRPID option, and the directory's set-gid bit is clear,
1116          *      then use the process's gid.
1117          *   3) Otherwise, set the group-id to the gid of the parent directory.
1118          */
1119         if ((va->va_mask & AT_GID) &&
1120             ((va->va_gid == dir->tn_gid) || groupmember(va->va_gid, cred) ||
1121             secpolicy_vnode_create_gid(cred) == 0)) {
1122                 /*
1123                  * XXX - is this only the case when a 4.0 NFS client, or a
1124                  * client derived from that code, makes a call over the wire?
1125                  */
1126                 tp->tn_gid = va->va_gid;
1127         } else {
1128                 if (dir->tn_mode & VSGID)
1129                         tp->tn_gid = dir->tn_gid;
1130                 else
1131                         tp->tn_gid = crgetgid(cred);
1132         }
1133         /*
1134          * If we're creating a directory, and the parent directory has the
1135          * set-GID bit set, set it on the new directory.
1136          * Otherwise, if the user is neither privileged nor a member of the
1137          * file's new group, clear the file's set-GID bit.
1138          */
1139         if (dir->tn_mode & VSGID && type == VDIR)
1140                 tp->tn_mode |= VSGID;
1141         else {
1142                 if ((tp->tn_mode & VSGID) &&
1143                     secpolicy_vnode_setids_setgids(cred, tp->tn_gid) != 0)
1144                         tp->tn_mode &= ~VSGID;
1145         }
1146 
1147         if (va->va_mask & AT_ATIME)
1148                 tp->tn_atime = va->va_atime;
1149         if (va->va_mask & AT_MTIME)
1150                 tp->tn_mtime = va->va_mtime;
1151 
1152         if (op == DE_MKDIR)
1153                 tdirinit(dir, tp);
1154 
1155         *newnode = tp;
1156         return (0);
1157 }