1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #pragma ident "%Z%%M% %I% %E% SMI"
27
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/sysmacros.h>
31 #include <sys/systm.h>
32 #include <sys/time.h>
33 #include <sys/vfs.h>
34 #include <sys/vnode.h>
35 #include <sys/errno.h>
36 #include <sys/cmn_err.h>
37 #include <sys/cred.h>
38 #include <sys/stat.h>
39 #include <sys/debug.h>
40 #include <sys/policy.h>
41 #include <sys/fs/tmpnode.h>
42 #include <sys/fs/tmp.h>
43 #include <sys/vtrace.h>
44
45 static int tdircheckpath(struct tmpnode *, struct tmpnode *, struct cred *);
46 static int tdirrename(struct tmpnode *, struct tmpnode *, struct tmpnode *,
47 char *, struct tmpnode *, struct tdirent *, struct cred *);
48 static void tdirfixdotdot(struct tmpnode *, struct tmpnode *, struct tmpnode *);
49 static int tdirmaketnode(struct tmpnode *, struct tmount *, struct vattr *,
50 enum de_op, struct tmpnode **, struct cred *);
51 static int tdiraddentry(struct tmpnode *, struct tmpnode *, char *,
52 enum de_op, struct tmpnode *);
53
54
55 #define T_HASH_SIZE 8192 /* must be power of 2 */
56 #define T_MUTEX_SIZE 64
57
58 /* Non-static so compilers won't constant-fold these away. */
59 clock_t tmpfs_rename_backoff_delay = 1;
60 unsigned int tmpfs_rename_backoff_tries = 0;
61 unsigned long tmpfs_rename_loops = 0;
62
63 static struct tdirent *t_hashtable[T_HASH_SIZE];
64 static kmutex_t t_hashmutex[T_MUTEX_SIZE];
65
66 #define T_HASH_INDEX(a) ((a) & (T_HASH_SIZE-1))
67 #define T_MUTEX_INDEX(a) ((a) & (T_MUTEX_SIZE-1))
68
69 #define TMPFS_HASH(tp, name, hash) \
70 { \
71 char Xc, *Xcp; \
72 hash = (uint_t)(uintptr_t)(tp) >> 8; \
73 for (Xcp = (name); (Xc = *Xcp) != 0; Xcp++) \
74 hash = (hash << 4) + hash + (uint_t)Xc; \
75 }
76
77 void
78 tmpfs_hash_init(void)
79 {
80 int ix;
81
82 for (ix = 0; ix < T_MUTEX_SIZE; ix++)
83 mutex_init(&t_hashmutex[ix], NULL, MUTEX_DEFAULT, NULL);
84 }
85
86 /*
87 * This routine is where the rubber meets the road for identities.
88 */
89 static void
90 tmpfs_hash_in(struct tdirent *t)
91 {
92 uint_t hash;
93 struct tdirent **prevpp;
94 kmutex_t *t_hmtx;
95
96 TMPFS_HASH(t->td_parent, t->td_name, hash);
97 t->td_hash = hash;
98 prevpp = &t_hashtable[T_HASH_INDEX(hash)];
99 t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
100 mutex_enter(t_hmtx);
101 t->td_link = *prevpp;
102 *prevpp = t;
103 mutex_exit(t_hmtx);
104 }
105
106 /*
107 * Remove tdirent *t from the hash list.
108 */
109 static void
110 tmpfs_hash_out(struct tdirent *t)
111 {
112 uint_t hash;
113 struct tdirent **prevpp;
114 kmutex_t *t_hmtx;
115
116 hash = t->td_hash;
117 prevpp = &t_hashtable[T_HASH_INDEX(hash)];
118 t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
119 mutex_enter(t_hmtx);
120 while (*prevpp != t)
121 prevpp = &(*prevpp)->td_link;
122 *prevpp = t->td_link;
123 mutex_exit(t_hmtx);
124 }
125
126 /*
127 * Currently called by tdirrename() only.
128 * rename operation needs to be done with lock held, to ensure that
129 * no other operations can access the tmpnode at the same instance.
130 */
131 static void
132 tmpfs_hash_change(struct tdirent *tdp, struct tmpnode *fromtp)
133 {
134 uint_t hash;
135 kmutex_t *t_hmtx;
136
137 hash = tdp->td_hash;
138 t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
139 mutex_enter(t_hmtx);
140 tdp->td_tmpnode = fromtp;
141 mutex_exit(t_hmtx);
142 }
143
144 static struct tdirent *
145 tmpfs_hash_lookup(char *name, struct tmpnode *parent, uint_t hold,
146 struct tmpnode **found)
147 {
148 struct tdirent *l;
149 uint_t hash;
150 kmutex_t *t_hmtx;
151 struct tmpnode *tnp;
152
153 TMPFS_HASH(parent, name, hash);
154 t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
155 mutex_enter(t_hmtx);
156 l = t_hashtable[T_HASH_INDEX(hash)];
157 while (l) {
158 if ((l->td_hash == hash) &&
159 (l->td_parent == parent) &&
160 (strcmp(l->td_name, name) == 0)) {
161 /*
162 * We need to make sure that the tmpnode that
163 * we put a hold on is the same one that we pass back.
164 * Hence, temporary variable tnp is necessary.
165 */
166 tnp = l->td_tmpnode;
167 if (hold) {
168 ASSERT(tnp);
169 tmpnode_hold(tnp);
170 }
171 if (found)
172 *found = tnp;
173 mutex_exit(t_hmtx);
174 return (l);
175 } else {
176 l = l->td_link;
177 }
178 }
179 mutex_exit(t_hmtx);
180 return (NULL);
181 }
182
183 /*
184 * Search directory 'parent' for entry 'name'.
185 *
186 * The calling thread can't hold the write version
187 * of the rwlock for the directory being searched
188 *
189 * 0 is returned on success and *foundtp points
190 * to the found tmpnode with its vnode held.
191 */
192 int
193 tdirlookup(
194 struct tmpnode *parent,
195 char *name,
196 struct tmpnode **foundtp,
197 struct cred *cred)
198 {
199 int error;
200
201 *foundtp = NULL;
202 if (parent->tn_type != VDIR)
203 return (ENOTDIR);
204
205 if ((error = tmp_taccess(parent, VEXEC, cred)))
206 return (error);
207
208 if (*name == '\0') {
209 tmpnode_hold(parent);
210 *foundtp = parent;
211 return (0);
212 }
213
214 /*
215 * Search the directory for the matching name
216 * We need the lock protecting the tn_dir list
217 * so that it doesn't change out from underneath us.
218 * tmpfs_hash_lookup() will pass back the tmpnode
219 * with a hold on it.
220 */
221
222 if (tmpfs_hash_lookup(name, parent, 1, foundtp) != NULL) {
223 ASSERT(*foundtp);
224 return (0);
225 }
226
227 return (ENOENT);
228 }
229
230 /*
231 * Enter a directory entry for 'name' and 'tp' into directory 'dir'
232 *
233 * Returns 0 on success.
234 */
235 int
236 tdirenter(
237 struct tmount *tm,
238 struct tmpnode *dir, /* target directory to make entry in */
239 char *name, /* name of entry */
240 enum de_op op, /* entry operation */
241 struct tmpnode *fromparent, /* source directory if rename */
242 struct tmpnode *tp, /* source tmpnode, if link/rename */
243 struct vattr *va,
244 struct tmpnode **tpp, /* return tmpnode, if create/mkdir */
245 struct cred *cred,
246 caller_context_t *ctp)
247 {
248 struct tdirent *tdp;
249 struct tmpnode *found = NULL;
250 int error = 0;
251 char *s;
252
253 /*
254 * tn_rwlock is held to serialize direnter and dirdeletes
255 */
256 ASSERT(RW_WRITE_HELD(&dir->tn_rwlock));
257 ASSERT(dir->tn_type == VDIR);
258
259 /*
260 * Don't allow '/' characters in pathname component
261 * (thus in ufs_direnter()).
262 */
263 for (s = name; *s; s++)
264 if (*s == '/')
265 return (EACCES);
266
267 if (name[0] == '\0')
268 panic("tdirenter: NULL name");
269
270 /*
271 * For link and rename lock the source entry and check the link count
272 * to see if it has been removed while it was unlocked.
273 */
274 if (op == DE_LINK || op == DE_RENAME) {
275 if (tp != dir) {
276 unsigned int tries = 0;
277
278 /*
279 * If we are acquiring tp->tn_rwlock (for SOURCE)
280 * inside here, we must consider the following:
281 *
282 * - dir->tn_rwlock (TARGET) is already HELD (see
283 * above ASSERT()).
284 *
285 * - It is possible our SOURCE is a parent of our
286 * TARGET. Yes it's unusual, but it will return an
287 * error below via tdircheckpath().
288 *
289 * - It is also possible that another thread,
290 * concurrent to this one, is performing
291 * rmdir(TARGET), which means it will first acquire
292 * SOURCE's lock, THEN acquire TARGET's lock, which
293 * could result in this thread holding TARGET and
294 * trying for SOURCE, but the other thread holding
295 * SOURCE and trying for TARGET. This is deadlock,
296 * and it's inducible.
297 *
298 * To prevent this, we borrow some techniques from UFS
299 * and rw_tryenter(), delaying if we fail, and
300 * if someone tweaks the number of backoff tries to be
301 * nonzero, return EBUSY after that number of tries.
302 */
303 while (!rw_tryenter(&tp->tn_rwlock, RW_WRITER)) {
304 /*
305 * Sloppy, but this is a diagnostic so atomic
306 * increment would be overkill.
307 */
308 tmpfs_rename_loops++;
309
310 if (tmpfs_rename_backoff_tries != 0) {
311 if (tries > tmpfs_rename_backoff_tries)
312 return (EBUSY);
313 tries++;
314 }
315 /*
316 * NOTE: We're still holding dir->tn_rwlock,
317 * so drop it over the delay, so any other
318 * thread can get its business done.
319 *
320 * No state change or state inspection happens
321 * prior to here, so it is not wholly dangerous
322 * to release-and-reacquire dir->tn_rwlock.
323 *
324 * Hold the vnode of dir in case it gets
325 * released by another thread, though.
326 */
327 VN_HOLD(TNTOV(dir));
328 rw_exit(&dir->tn_rwlock);
329 delay(tmpfs_rename_backoff_delay);
330 rw_enter(&dir->tn_rwlock, RW_WRITER);
331 VN_RELE(TNTOV(dir));
332 }
333 }
334 mutex_enter(&tp->tn_tlock);
335 if (tp->tn_nlink == 0) {
336 mutex_exit(&tp->tn_tlock);
337 if (tp != dir)
338 rw_exit(&tp->tn_rwlock);
339 return (ENOENT);
340 }
341
342 if (tp->tn_nlink == MAXLINK) {
343 mutex_exit(&tp->tn_tlock);
344 if (tp != dir)
345 rw_exit(&tp->tn_rwlock);
346 return (EMLINK);
347 }
348 tp->tn_nlink++;
349 gethrestime(&tp->tn_ctime);
350 mutex_exit(&tp->tn_tlock);
351 if (tp != dir)
352 rw_exit(&tp->tn_rwlock);
353 }
354
355 /*
356 * This might be a "dangling detached directory".
357 * it could have been removed, but a reference
358 * to it kept in u_cwd. don't bother searching
359 * it, and with any luck the user will get tired
360 * of dealing with us and cd to some absolute
361 * pathway. *sigh*, thus in ufs, too.
362 */
363 if (dir->tn_nlink == 0) {
364 error = ENOENT;
365 goto out;
366 }
367
368 /*
369 * If this is a rename of a directory and the parent is
370 * different (".." must be changed), then the source
371 * directory must not be in the directory hierarchy
372 * above the target, as this would orphan everything
373 * below the source directory.
374 */
375 if (op == DE_RENAME) {
376 if (tp == dir) {
377 error = EINVAL;
378 goto out;
379 }
380 if (tp->tn_type == VDIR) {
381 if ((fromparent != dir) &&
382 (error = tdircheckpath(tp, dir, cred))) {
383 goto out;
384 }
385 }
386 }
387
388 /*
389 * Search for the entry. Return "found" if it exists.
390 */
391 tdp = tmpfs_hash_lookup(name, dir, 1, &found);
392
393 if (tdp) {
394 ASSERT(found);
395 switch (op) {
396 case DE_CREATE:
397 case DE_MKDIR:
398 if (tpp) {
399 *tpp = found;
400 error = EEXIST;
401 } else {
402 tmpnode_rele(found);
403 }
404 break;
405
406 case DE_RENAME:
407 error = tdirrename(fromparent, tp,
408 dir, name, found, tdp, cred);
409 if (error == 0) {
410 if (found != NULL) {
411 vnevent_rename_dest(TNTOV(found),
412 TNTOV(dir), name, ctp);
413 }
414 }
415
416 tmpnode_rele(found);
417 break;
418
419 case DE_LINK:
420 /*
421 * Can't link to an existing file.
422 */
423 error = EEXIST;
424 tmpnode_rele(found);
425 break;
426 }
427 } else {
428
429 /*
430 * The entry does not exist. Check write permission in
431 * directory to see if entry can be created.
432 */
433 if (error = tmp_taccess(dir, VWRITE, cred))
434 goto out;
435 if (op == DE_CREATE || op == DE_MKDIR) {
436 /*
437 * Make new tmpnode and directory entry as required.
438 */
439 error = tdirmaketnode(dir, tm, va, op, &tp, cred);
440 if (error)
441 goto out;
442 }
443 if (error = tdiraddentry(dir, tp, name, op, fromparent)) {
444 if (op == DE_CREATE || op == DE_MKDIR) {
445 /*
446 * Unmake the inode we just made.
447 */
448 rw_enter(&tp->tn_rwlock, RW_WRITER);
449 if ((tp->tn_type) == VDIR) {
450 ASSERT(tdp == NULL);
451 /*
452 * cleanup allocs made by tdirinit()
453 */
454 tdirtrunc(tp);
455 }
456 mutex_enter(&tp->tn_tlock);
457 tp->tn_nlink = 0;
458 mutex_exit(&tp->tn_tlock);
459 gethrestime(&tp->tn_ctime);
460 rw_exit(&tp->tn_rwlock);
461 tmpnode_rele(tp);
462 tp = NULL;
463 }
464 } else if (tpp) {
465 *tpp = tp;
466 } else if (op == DE_CREATE || op == DE_MKDIR) {
467 tmpnode_rele(tp);
468 }
469 }
470
471 out:
472 if (error && (op == DE_LINK || op == DE_RENAME)) {
473 /*
474 * Undo bumped link count.
475 */
476 DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock);
477 gethrestime(&tp->tn_ctime);
478 }
479 return (error);
480 }
481
482 /*
483 * Delete entry tp of name "nm" from dir.
484 * Free dir entry space and decrement link count on tmpnode(s).
485 *
486 * Return 0 on success.
487 */
488 int
489 tdirdelete(
490 struct tmpnode *dir,
491 struct tmpnode *tp,
492 char *nm,
493 enum dr_op op,
494 struct cred *cred)
495 {
496 struct tdirent *tpdp;
497 int error;
498 size_t namelen;
499 struct tmpnode *tnp;
500 timestruc_t now;
501
502 ASSERT(RW_WRITE_HELD(&dir->tn_rwlock));
503 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock));
504 ASSERT(dir->tn_type == VDIR);
505
506 if (nm[0] == '\0')
507 panic("tdirdelete: NULL name for %p", (void *)tp);
508
509 /*
510 * return error when removing . and ..
511 */
512 if (nm[0] == '.') {
513 if (nm[1] == '\0')
514 return (EINVAL);
515 if (nm[1] == '.' && nm[2] == '\0')
516 return (EEXIST); /* thus in ufs */
517 }
518
519 if (error = tmp_taccess(dir, VEXEC|VWRITE, cred))
520 return (error);
521
522 /*
523 * If the parent directory is "sticky", then the user must
524 * own the parent directory or the file in it, or else must
525 * have permission to write the file. Otherwise it may not
526 * be deleted (except by privileged users).
527 * Same as ufs_dirremove.
528 */
529 if ((error = tmp_sticky_remove_access(dir, tp, cred)) != 0)
530 return (error);
531
532 if (dir->tn_dir == NULL)
533 return (ENOENT);
534
535 tpdp = tmpfs_hash_lookup(nm, dir, 0, &tnp);
536 if (tpdp == NULL) {
537 /*
538 * If it is gone, some other thread got here first!
539 * Return error ENOENT.
540 */
541 return (ENOENT);
542 }
543
544 /*
545 * If the tmpnode in the tdirent changed, we were probably
546 * the victim of a concurrent rename operation. The original
547 * is gone, so return that status (same as UFS).
548 */
549 if (tp != tnp)
550 return (ENOENT);
551
552 tmpfs_hash_out(tpdp);
553
554 /*
555 * Take tpdp out of the directory list.
556 */
557 ASSERT(tpdp->td_next != tpdp);
558 ASSERT(tpdp->td_prev != tpdp);
559 if (tpdp->td_prev) {
560 tpdp->td_prev->td_next = tpdp->td_next;
561 }
562 if (tpdp->td_next) {
563 tpdp->td_next->td_prev = tpdp->td_prev;
564 }
565
566 /*
567 * If the roving slot pointer happens to match tpdp,
568 * point it at the previous dirent.
569 */
570 if (dir->tn_dir->td_prev == tpdp) {
571 dir->tn_dir->td_prev = tpdp->td_prev;
572 }
573 ASSERT(tpdp->td_next != tpdp);
574 ASSERT(tpdp->td_prev != tpdp);
575
576 /*
577 * tpdp points to the correct directory entry
578 */
579 namelen = strlen(tpdp->td_name) + 1;
580
581 tmp_memfree(tpdp, sizeof (struct tdirent) + namelen);
582 dir->tn_size -= (sizeof (struct tdirent) + namelen);
583 dir->tn_dirents--;
584
585 gethrestime(&now);
586 dir->tn_mtime = now;
587 dir->tn_ctime = now;
588 tp->tn_ctime = now;
589
590 ASSERT(tp->tn_nlink > 0);
591 DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock);
592 if (op == DR_RMDIR && tp->tn_type == VDIR) {
593 tdirtrunc(tp);
594 ASSERT(tp->tn_nlink == 0);
595 }
596 return (0);
597 }
598
599 /*
600 * tdirinit is used internally to initialize a directory (dir)
601 * with '.' and '..' entries without checking permissions and locking
602 */
603 void
604 tdirinit(
605 struct tmpnode *parent, /* parent of directory to initialize */
606 struct tmpnode *dir) /* the new directory */
607 {
608 struct tdirent *dot, *dotdot;
609 timestruc_t now;
610
611 ASSERT(RW_WRITE_HELD(&parent->tn_rwlock));
612 ASSERT(dir->tn_type == VDIR);
613
614 dot = tmp_memalloc(sizeof (struct tdirent) + 2, TMP_MUSTHAVE);
615 dotdot = tmp_memalloc(sizeof (struct tdirent) + 3, TMP_MUSTHAVE);
616
617 /*
618 * Initialize the entries
619 */
620 dot->td_tmpnode = dir;
621 dot->td_offset = 0;
622 dot->td_name = (char *)dot + sizeof (struct tdirent);
623 dot->td_name[0] = '.';
624 dot->td_parent = dir;
625 tmpfs_hash_in(dot);
626
627 dotdot->td_tmpnode = parent;
628 dotdot->td_offset = 1;
629 dotdot->td_name = (char *)dotdot + sizeof (struct tdirent);
630 dotdot->td_name[0] = '.';
631 dotdot->td_name[1] = '.';
632 dotdot->td_parent = dir;
633 tmpfs_hash_in(dotdot);
634
635 /*
636 * Initialize directory entry list.
637 */
638 dot->td_next = dotdot;
639 dot->td_prev = dotdot; /* dot's td_prev holds roving slot pointer */
640 dotdot->td_next = NULL;
641 dotdot->td_prev = dot;
642
643 gethrestime(&now);
644 dir->tn_mtime = now;
645 dir->tn_ctime = now;
646
647 /*
648 * Link counts are special for the hidden attribute directory.
649 * The only explicit reference in the name space is "." and
650 * the reference through ".." is not counted on the parent
651 * file. The attrdir is created as a side effect to lookup,
652 * so don't change the ctime of the parent.
653 * Since tdirinit is called with both dir and parent being the
654 * same for the root vnode, we need to increment this before we set
655 * tn_nlink = 2 below.
656 */
657 if (!(dir->tn_vnode->v_flag & V_XATTRDIR)) {
658 INCR_COUNT(&parent->tn_nlink, &parent->tn_tlock);
659 parent->tn_ctime = now;
660 }
661
662 dir->tn_dir = dot;
663 dir->tn_size = 2 * sizeof (struct tdirent) + 5; /* dot and dotdot */
664 dir->tn_dirents = 2;
665 dir->tn_nlink = 2;
666 }
667
668
669 /*
670 * tdirtrunc is called to remove all directory entries under this directory.
671 */
672 void
673 tdirtrunc(struct tmpnode *dir)
674 {
675 struct tdirent *tdp;
676 struct tmpnode *tp;
677 size_t namelen;
678 timestruc_t now;
679 int isvattrdir, isdotdot, skip_decr;
680
681 ASSERT(RW_WRITE_HELD(&dir->tn_rwlock));
682 ASSERT(dir->tn_type == VDIR);
683
684 isvattrdir = (dir->tn_vnode->v_flag & V_XATTRDIR) ? 1 : 0;
685 for (tdp = dir->tn_dir; tdp; tdp = dir->tn_dir) {
686 ASSERT(tdp->td_next != tdp);
687 ASSERT(tdp->td_prev != tdp);
688 ASSERT(tdp->td_tmpnode);
689
690 dir->tn_dir = tdp->td_next;
691 namelen = strlen(tdp->td_name) + 1;
692
693 /*
694 * Adjust the link counts to account for this directory
695 * entry removal. Hidden attribute directories may
696 * not be empty as they may be truncated as a side-
697 * effect of removing the parent. We do hold/rele
698 * operations to free up these tmpnodes.
699 *
700 * Skip the link count adjustment for parents of
701 * attribute directories as those link counts
702 * do not include the ".." reference in the hidden
703 * directories.
704 */
705 tp = tdp->td_tmpnode;
706 isdotdot = (strcmp("..", tdp->td_name) == 0);
707 skip_decr = (isvattrdir && isdotdot);
708 if (!skip_decr) {
709 ASSERT(tp->tn_nlink > 0);
710 DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock);
711 }
712
713 tmpfs_hash_out(tdp);
714
715 tmp_memfree(tdp, sizeof (struct tdirent) + namelen);
716 dir->tn_size -= (sizeof (struct tdirent) + namelen);
717 dir->tn_dirents--;
718 }
719
720 gethrestime(&now);
721 dir->tn_mtime = now;
722 dir->tn_ctime = now;
723
724 ASSERT(dir->tn_dir == NULL);
725 ASSERT(dir->tn_size == 0);
726 ASSERT(dir->tn_dirents == 0);
727 }
728
729 /*
730 * Check if the source directory is in the path of the target directory.
731 * The target directory is locked by the caller.
732 *
733 * XXX - The source and target's should be different upon entry.
734 */
735 static int
736 tdircheckpath(
737 struct tmpnode *fromtp,
738 struct tmpnode *toparent,
739 struct cred *cred)
740 {
741 int error = 0;
742 struct tmpnode *dir, *dotdot;
743 struct tdirent *tdp;
744
745 ASSERT(RW_WRITE_HELD(&toparent->tn_rwlock));
746
747 tdp = tmpfs_hash_lookup("..", toparent, 1, &dotdot);
748 if (tdp == NULL)
749 return (ENOENT);
750
751 ASSERT(dotdot);
752
753 if (dotdot == toparent) {
754 /* root of fs. search trivially satisfied. */
755 tmpnode_rele(dotdot);
756 return (0);
757 }
758 for (;;) {
759 /*
760 * Return error for cases like "mv c c/d",
761 * "mv c c/d/e" and so on.
762 */
763 if (dotdot == fromtp) {
764 tmpnode_rele(dotdot);
765 error = EINVAL;
766 break;
767 }
768 dir = dotdot;
769 error = tdirlookup(dir, "..", &dotdot, cred);
770 if (error) {
771 tmpnode_rele(dir);
772 break;
773 }
774 /*
775 * We're okay if we traverse the directory tree up to
776 * the root directory and don't run into the
777 * parent directory.
778 */
779 if (dir == dotdot) {
780 tmpnode_rele(dir);
781 tmpnode_rele(dotdot);
782 break;
783 }
784 tmpnode_rele(dir);
785 }
786 return (error);
787 }
788
789 static int
790 tdirrename(
791 struct tmpnode *fromparent, /* parent directory of source */
792 struct tmpnode *fromtp, /* source tmpnode */
793 struct tmpnode *toparent, /* parent directory of target */
794 char *nm, /* entry we are trying to change */
795 struct tmpnode *to, /* target tmpnode */
796 struct tdirent *where, /* target tmpnode directory entry */
797 struct cred *cred) /* credentials */
798 {
799 int error = 0;
800 int doingdirectory;
801 timestruc_t now;
802
803 #if defined(lint)
804 nm = nm;
805 #endif
806 ASSERT(RW_WRITE_HELD(&toparent->tn_rwlock));
807
808 /*
809 * Short circuit rename of something to itself.
810 */
811 if (fromtp == to)
812 return (ESAME); /* special KLUDGE error code */
813
814 rw_enter(&fromtp->tn_rwlock, RW_READER);
815 rw_enter(&to->tn_rwlock, RW_READER);
816
817 /*
818 * Check that everything is on the same filesystem.
819 */
820 if (to->tn_vnode->v_vfsp != toparent->tn_vnode->v_vfsp ||
821 to->tn_vnode->v_vfsp != fromtp->tn_vnode->v_vfsp) {
822 error = EXDEV;
823 goto out;
824 }
825
826 /*
827 * Must have write permission to rewrite target entry.
828 * Check for stickyness.
829 */
830 if ((error = tmp_taccess(toparent, VWRITE, cred)) != 0 ||
831 (error = tmp_sticky_remove_access(toparent, to, cred)) != 0)
832 goto out;
833
834 /*
835 * Ensure source and target are compatible (both directories
836 * or both not directories). If target is a directory it must
837 * be empty and have no links to it; in addition it must not
838 * be a mount point, and both the source and target must be
839 * writable.
840 */
841 doingdirectory = (fromtp->tn_type == VDIR);
842 if (to->tn_type == VDIR) {
843 if (!doingdirectory) {
844 error = EISDIR;
845 goto out;
846 }
847 /*
848 * vn_vfswlock will prevent mounts from using the directory
849 * until we are done.
850 */
851 if (vn_vfswlock(TNTOV(to))) {
852 error = EBUSY;
853 goto out;
854 }
855 if (vn_mountedvfs(TNTOV(to)) != NULL) {
856 vn_vfsunlock(TNTOV(to));
857 error = EBUSY;
858 goto out;
859 }
860
861 mutex_enter(&to->tn_tlock);
862 if (to->tn_dirents > 2 || to->tn_nlink > 2) {
863 mutex_exit(&to->tn_tlock);
864 vn_vfsunlock(TNTOV(to));
865 error = EEXIST; /* SIGH should be ENOTEMPTY */
866 /*
867 * Update atime because checking tn_dirents is
868 * logically equivalent to reading the directory
869 */
870 gethrestime(&to->tn_atime);
871 goto out;
872 }
873 mutex_exit(&to->tn_tlock);
874 } else if (doingdirectory) {
875 error = ENOTDIR;
876 goto out;
877 }
878
879 tmpfs_hash_change(where, fromtp);
880 gethrestime(&now);
881 toparent->tn_mtime = now;
882 toparent->tn_ctime = now;
883
884 /*
885 * Upgrade to write lock on "to" (i.e., the target tmpnode).
886 */
887 rw_exit(&to->tn_rwlock);
888 rw_enter(&to->tn_rwlock, RW_WRITER);
889
890 /*
891 * Decrement the link count of the target tmpnode.
892 */
893 DECR_COUNT(&to->tn_nlink, &to->tn_tlock);
894 to->tn_ctime = now;
895
896 if (doingdirectory) {
897 /*
898 * The entry for "to" no longer exists so release the vfslock.
899 */
900 vn_vfsunlock(TNTOV(to));
901
902 /*
903 * Decrement the target link count and delete all entires.
904 */
905 tdirtrunc(to);
906 ASSERT(to->tn_nlink == 0);
907
908 /*
909 * Renaming a directory with the parent different
910 * requires that ".." be rewritten. The window is
911 * still there for ".." to be inconsistent, but this
912 * is unavoidable, and a lot shorter than when it was
913 * done in a user process.
914 */
915 if (fromparent != toparent)
916 tdirfixdotdot(fromtp, fromparent, toparent);
917 }
918 out:
919 rw_exit(&to->tn_rwlock);
920 rw_exit(&fromtp->tn_rwlock);
921 return (error);
922 }
923
924 static void
925 tdirfixdotdot(
926 struct tmpnode *fromtp, /* child directory */
927 struct tmpnode *fromparent, /* old parent directory */
928 struct tmpnode *toparent) /* new parent directory */
929 {
930 struct tdirent *dotdot;
931
932 ASSERT(RW_LOCK_HELD(&toparent->tn_rwlock));
933
934 /*
935 * Increment the link count in the new parent tmpnode
936 */
937 INCR_COUNT(&toparent->tn_nlink, &toparent->tn_tlock);
938 gethrestime(&toparent->tn_ctime);
939
940 dotdot = tmpfs_hash_lookup("..", fromtp, 0, NULL);
941
942 ASSERT(dotdot->td_tmpnode == fromparent);
943 dotdot->td_tmpnode = toparent;
944
945 /*
946 * Decrement the link count of the old parent tmpnode.
947 * If fromparent is NULL, then this is a new directory link;
948 * it has no parent, so we need not do anything.
949 */
950 if (fromparent != NULL) {
951 mutex_enter(&fromparent->tn_tlock);
952 if (fromparent->tn_nlink != 0) {
953 fromparent->tn_nlink--;
954 gethrestime(&fromparent->tn_ctime);
955 }
956 mutex_exit(&fromparent->tn_tlock);
957 }
958 }
959
960 static int
961 tdiraddentry(
962 struct tmpnode *dir, /* target directory to make entry in */
963 struct tmpnode *tp, /* new tmpnode */
964 char *name,
965 enum de_op op,
966 struct tmpnode *fromtp)
967 {
968 struct tdirent *tdp, *tpdp;
969 size_t namelen, alloc_size;
970 timestruc_t now;
971
972 /*
973 * Make sure the parent directory wasn't removed from
974 * underneath the caller.
975 */
976 if (dir->tn_dir == NULL)
977 return (ENOENT);
978
979 /*
980 * Check that everything is on the same filesystem.
981 */
982 if (tp->tn_vnode->v_vfsp != dir->tn_vnode->v_vfsp)
983 return (EXDEV);
984
985 /*
986 * Allocate and initialize directory entry
987 */
988 namelen = strlen(name) + 1;
989 alloc_size = namelen + sizeof (struct tdirent);
990 tdp = tmp_memalloc(alloc_size, 0);
991 if (tdp == NULL)
992 return (ENOSPC);
993
994 if ((op == DE_RENAME) && (tp->tn_type == VDIR))
995 tdirfixdotdot(tp, fromtp, dir);
996
997 dir->tn_size += alloc_size;
998 dir->tn_dirents++;
999 tdp->td_tmpnode = tp;
1000 tdp->td_parent = dir;
1001
1002 /*
1003 * The directory entry and its name were allocated sequentially.
1004 */
1005 tdp->td_name = (char *)tdp + sizeof (struct tdirent);
1006 (void) strcpy(tdp->td_name, name);
1007
1008 tmpfs_hash_in(tdp);
1009
1010 /*
1011 * Some utilities expect the size of a directory to remain
1012 * somewhat static. For example, a routine which unlinks
1013 * files between calls to readdir(); the size of the
1014 * directory changes from underneath it and so the real
1015 * directory offset in bytes is invalid. To circumvent
1016 * this problem, we initialize a directory entry with an
1017 * phony offset, and use this offset to determine end of
1018 * file in tmp_readdir.
1019 */
1020 tpdp = dir->tn_dir->td_prev;
1021 /*
1022 * Install at first empty "slot" in directory list.
1023 */
1024 while (tpdp->td_next != NULL && (tpdp->td_next->td_offset -
1025 tpdp->td_offset) <= 1) {
1026 ASSERT(tpdp->td_next != tpdp);
1027 ASSERT(tpdp->td_prev != tpdp);
1028 ASSERT(tpdp->td_next->td_offset > tpdp->td_offset);
1029 tpdp = tpdp->td_next;
1030 }
1031 tdp->td_offset = tpdp->td_offset + 1;
1032
1033 /*
1034 * If we're at the end of the dirent list and the offset (which
1035 * is necessarily the largest offset in this directory) is more
1036 * than twice the number of dirents, that means the directory is
1037 * 50% holes. At this point we reset the slot pointer back to
1038 * the beginning of the directory so we start using the holes.
1039 * The idea is that if there are N dirents, there must also be
1040 * N holes, so we can satisfy the next N creates by walking at
1041 * most 2N entries; thus the average cost of a create is constant.
1042 * Note that we use the first dirent's td_prev as the roving
1043 * slot pointer; it's ugly, but it saves a word in every dirent.
1044 */
1045 if (tpdp->td_next == NULL && tpdp->td_offset > 2 * dir->tn_dirents)
1046 dir->tn_dir->td_prev = dir->tn_dir->td_next;
1047 else
1048 dir->tn_dir->td_prev = tdp;
1049
1050 ASSERT(tpdp->td_next != tpdp);
1051 ASSERT(tpdp->td_prev != tpdp);
1052
1053 tdp->td_next = tpdp->td_next;
1054 if (tdp->td_next) {
1055 tdp->td_next->td_prev = tdp;
1056 }
1057 tdp->td_prev = tpdp;
1058 tpdp->td_next = tdp;
1059
1060 ASSERT(tdp->td_next != tdp);
1061 ASSERT(tdp->td_prev != tdp);
1062 ASSERT(tpdp->td_next != tpdp);
1063 ASSERT(tpdp->td_prev != tpdp);
1064
1065 gethrestime(&now);
1066 dir->tn_mtime = now;
1067 dir->tn_ctime = now;
1068
1069 return (0);
1070 }
1071
1072 static int
1073 tdirmaketnode(
1074 struct tmpnode *dir,
1075 struct tmount *tm,
1076 struct vattr *va,
1077 enum de_op op,
1078 struct tmpnode **newnode,
1079 struct cred *cred)
1080 {
1081 struct tmpnode *tp;
1082 enum vtype type;
1083
1084 ASSERT(va != NULL);
1085 ASSERT(op == DE_CREATE || op == DE_MKDIR);
1086 if (((va->va_mask & AT_ATIME) && TIMESPEC_OVERFLOW(&va->va_atime)) ||
1087 ((va->va_mask & AT_MTIME) && TIMESPEC_OVERFLOW(&va->va_mtime)))
1088 return (EOVERFLOW);
1089 type = va->va_type;
1090 tp = tmp_memalloc(sizeof (struct tmpnode), TMP_MUSTHAVE);
1091 tmpnode_init(tm, tp, va, cred);
1092
1093 /* setup normal file/dir's extended attribute directory */
1094 if (dir->tn_flags & ISXATTR) {
1095 /* parent dir is , mark file as xattr */
1096 tp->tn_flags |= ISXATTR;
1097 }
1098
1099
1100 if (type == VBLK || type == VCHR) {
1101 tp->tn_vnode->v_rdev = tp->tn_rdev = va->va_rdev;
1102 } else {
1103 tp->tn_vnode->v_rdev = tp->tn_rdev = NODEV;
1104 }
1105 tp->tn_vnode->v_type = type;
1106 tp->tn_uid = crgetuid(cred);
1107
1108 /*
1109 * To determine the group-id of the created file:
1110 * 1) If the gid is set in the attribute list (non-Sun & pre-4.0
1111 * clients are not likely to set the gid), then use it if
1112 * the process is privileged, belongs to the target group,
1113 * or the group is the same as the parent directory.
1114 * 2) If the filesystem was not mounted with the Old-BSD-compatible
1115 * GRPID option, and the directory's set-gid bit is clear,
1116 * then use the process's gid.
1117 * 3) Otherwise, set the group-id to the gid of the parent directory.
1118 */
1119 if ((va->va_mask & AT_GID) &&
1120 ((va->va_gid == dir->tn_gid) || groupmember(va->va_gid, cred) ||
1121 secpolicy_vnode_create_gid(cred) == 0)) {
1122 /*
1123 * XXX - is this only the case when a 4.0 NFS client, or a
1124 * client derived from that code, makes a call over the wire?
1125 */
1126 tp->tn_gid = va->va_gid;
1127 } else {
1128 if (dir->tn_mode & VSGID)
1129 tp->tn_gid = dir->tn_gid;
1130 else
1131 tp->tn_gid = crgetgid(cred);
1132 }
1133 /*
1134 * If we're creating a directory, and the parent directory has the
1135 * set-GID bit set, set it on the new directory.
1136 * Otherwise, if the user is neither privileged nor a member of the
1137 * file's new group, clear the file's set-GID bit.
1138 */
1139 if (dir->tn_mode & VSGID && type == VDIR)
1140 tp->tn_mode |= VSGID;
1141 else {
1142 if ((tp->tn_mode & VSGID) &&
1143 secpolicy_vnode_setids_setgids(cred, tp->tn_gid) != 0)
1144 tp->tn_mode &= ~VSGID;
1145 }
1146
1147 if (va->va_mask & AT_ATIME)
1148 tp->tn_atime = va->va_atime;
1149 if (va->va_mask & AT_MTIME)
1150 tp->tn_mtime = va->va_mtime;
1151
1152 if (op == DE_MKDIR)
1153 tdirinit(dir, tp);
1154
1155 *newnode = tp;
1156 return (0);
1157 }