Print this page
XXXXX tmpfs can be induced to deadlock
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/tmpfs/tmp_dir.c
+++ new/usr/src/uts/common/fs/tmpfs/tmp_dir.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26 #pragma ident "%Z%%M% %I% %E% SMI"
27 27
28 28 #include <sys/types.h>
29 29 #include <sys/param.h>
30 30 #include <sys/sysmacros.h>
31 31 #include <sys/systm.h>
32 32 #include <sys/time.h>
33 33 #include <sys/vfs.h>
34 34 #include <sys/vnode.h>
35 35 #include <sys/errno.h>
36 36 #include <sys/cmn_err.h>
37 37 #include <sys/cred.h>
38 38 #include <sys/stat.h>
39 39 #include <sys/debug.h>
40 40 #include <sys/policy.h>
41 41 #include <sys/fs/tmpnode.h>
42 42 #include <sys/fs/tmp.h>
43 43 #include <sys/vtrace.h>
44 44
45 45 static int tdircheckpath(struct tmpnode *, struct tmpnode *, struct cred *);
46 46 static int tdirrename(struct tmpnode *, struct tmpnode *, struct tmpnode *,
47 47 char *, struct tmpnode *, struct tdirent *, struct cred *);
|
↓ open down ↓ |
47 lines elided |
↑ open up ↑ |
48 48 static void tdirfixdotdot(struct tmpnode *, struct tmpnode *, struct tmpnode *);
49 49 static int tdirmaketnode(struct tmpnode *, struct tmount *, struct vattr *,
50 50 enum de_op, struct tmpnode **, struct cred *);
51 51 static int tdiraddentry(struct tmpnode *, struct tmpnode *, char *,
52 52 enum de_op, struct tmpnode *);
53 53
54 54
55 55 #define T_HASH_SIZE 8192 /* must be power of 2 */
56 56 #define T_MUTEX_SIZE 64
57 57
58 +/* Non-static so compilers won't constant-fold these away. */
59 +clock_t tmpfs_rename_backoff_delay = 1;
60 +unsigned int tmpfs_rename_backoff_tries = 0;
61 +unsigned long tmpfs_rename_loops = 0;
62 +
58 63 static struct tdirent *t_hashtable[T_HASH_SIZE];
59 64 static kmutex_t t_hashmutex[T_MUTEX_SIZE];
60 65
61 66 #define T_HASH_INDEX(a) ((a) & (T_HASH_SIZE-1))
62 67 #define T_MUTEX_INDEX(a) ((a) & (T_MUTEX_SIZE-1))
63 68
64 69 #define TMPFS_HASH(tp, name, hash) \
65 70 { \
66 71 char Xc, *Xcp; \
67 72 hash = (uint_t)(uintptr_t)(tp) >> 8; \
68 73 for (Xcp = (name); (Xc = *Xcp) != 0; Xcp++) \
69 74 hash = (hash << 4) + hash + (uint_t)Xc; \
70 75 }
71 76
72 77 void
73 78 tmpfs_hash_init(void)
74 79 {
75 80 int ix;
76 81
77 82 for (ix = 0; ix < T_MUTEX_SIZE; ix++)
78 83 mutex_init(&t_hashmutex[ix], NULL, MUTEX_DEFAULT, NULL);
79 84 }
80 85
81 86 /*
82 87 * This routine is where the rubber meets the road for identities.
83 88 */
84 89 static void
85 90 tmpfs_hash_in(struct tdirent *t)
86 91 {
87 92 uint_t hash;
88 93 struct tdirent **prevpp;
89 94 kmutex_t *t_hmtx;
90 95
91 96 TMPFS_HASH(t->td_parent, t->td_name, hash);
92 97 t->td_hash = hash;
93 98 prevpp = &t_hashtable[T_HASH_INDEX(hash)];
94 99 t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
95 100 mutex_enter(t_hmtx);
96 101 t->td_link = *prevpp;
97 102 *prevpp = t;
98 103 mutex_exit(t_hmtx);
99 104 }
100 105
101 106 /*
102 107 * Remove tdirent *t from the hash list.
103 108 */
104 109 static void
105 110 tmpfs_hash_out(struct tdirent *t)
106 111 {
107 112 uint_t hash;
108 113 struct tdirent **prevpp;
109 114 kmutex_t *t_hmtx;
110 115
111 116 hash = t->td_hash;
112 117 prevpp = &t_hashtable[T_HASH_INDEX(hash)];
113 118 t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
114 119 mutex_enter(t_hmtx);
115 120 while (*prevpp != t)
116 121 prevpp = &(*prevpp)->td_link;
117 122 *prevpp = t->td_link;
118 123 mutex_exit(t_hmtx);
119 124 }
120 125
121 126 /*
122 127 * Currently called by tdirrename() only.
123 128 * rename operation needs to be done with lock held, to ensure that
124 129 * no other operations can access the tmpnode at the same instance.
125 130 */
126 131 static void
127 132 tmpfs_hash_change(struct tdirent *tdp, struct tmpnode *fromtp)
128 133 {
129 134 uint_t hash;
130 135 kmutex_t *t_hmtx;
131 136
132 137 hash = tdp->td_hash;
133 138 t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
134 139 mutex_enter(t_hmtx);
135 140 tdp->td_tmpnode = fromtp;
136 141 mutex_exit(t_hmtx);
137 142 }
138 143
139 144 static struct tdirent *
140 145 tmpfs_hash_lookup(char *name, struct tmpnode *parent, uint_t hold,
141 146 struct tmpnode **found)
142 147 {
143 148 struct tdirent *l;
144 149 uint_t hash;
145 150 kmutex_t *t_hmtx;
146 151 struct tmpnode *tnp;
147 152
148 153 TMPFS_HASH(parent, name, hash);
149 154 t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
150 155 mutex_enter(t_hmtx);
151 156 l = t_hashtable[T_HASH_INDEX(hash)];
152 157 while (l) {
153 158 if ((l->td_hash == hash) &&
154 159 (l->td_parent == parent) &&
155 160 (strcmp(l->td_name, name) == 0)) {
156 161 /*
157 162 * We need to make sure that the tmpnode that
158 163 * we put a hold on is the same one that we pass back.
159 164 * Hence, temporary variable tnp is necessary.
160 165 */
161 166 tnp = l->td_tmpnode;
162 167 if (hold) {
163 168 ASSERT(tnp);
164 169 tmpnode_hold(tnp);
165 170 }
166 171 if (found)
167 172 *found = tnp;
168 173 mutex_exit(t_hmtx);
169 174 return (l);
170 175 } else {
171 176 l = l->td_link;
172 177 }
173 178 }
174 179 mutex_exit(t_hmtx);
175 180 return (NULL);
176 181 }
177 182
178 183 /*
179 184 * Search directory 'parent' for entry 'name'.
180 185 *
181 186 * The calling thread can't hold the write version
182 187 * of the rwlock for the directory being searched
183 188 *
184 189 * 0 is returned on success and *foundtp points
185 190 * to the found tmpnode with its vnode held.
186 191 */
187 192 int
188 193 tdirlookup(
189 194 struct tmpnode *parent,
190 195 char *name,
191 196 struct tmpnode **foundtp,
192 197 struct cred *cred)
193 198 {
194 199 int error;
195 200
196 201 *foundtp = NULL;
197 202 if (parent->tn_type != VDIR)
198 203 return (ENOTDIR);
199 204
200 205 if ((error = tmp_taccess(parent, VEXEC, cred)))
201 206 return (error);
202 207
203 208 if (*name == '\0') {
204 209 tmpnode_hold(parent);
205 210 *foundtp = parent;
206 211 return (0);
207 212 }
208 213
209 214 /*
210 215 * Search the directory for the matching name
211 216 * We need the lock protecting the tn_dir list
212 217 * so that it doesn't change out from underneath us.
213 218 * tmpfs_hash_lookup() will pass back the tmpnode
214 219 * with a hold on it.
215 220 */
216 221
217 222 if (tmpfs_hash_lookup(name, parent, 1, foundtp) != NULL) {
218 223 ASSERT(*foundtp);
219 224 return (0);
220 225 }
221 226
222 227 return (ENOENT);
223 228 }
224 229
225 230 /*
226 231 * Enter a directory entry for 'name' and 'tp' into directory 'dir'
227 232 *
228 233 * Returns 0 on success.
229 234 */
230 235 int
231 236 tdirenter(
232 237 struct tmount *tm,
233 238 struct tmpnode *dir, /* target directory to make entry in */
234 239 char *name, /* name of entry */
235 240 enum de_op op, /* entry operation */
236 241 struct tmpnode *fromparent, /* source directory if rename */
237 242 struct tmpnode *tp, /* source tmpnode, if link/rename */
238 243 struct vattr *va,
239 244 struct tmpnode **tpp, /* return tmpnode, if create/mkdir */
240 245 struct cred *cred,
241 246 caller_context_t *ctp)
242 247 {
243 248 struct tdirent *tdp;
244 249 struct tmpnode *found = NULL;
245 250 int error = 0;
246 251 char *s;
247 252
248 253 /*
249 254 * tn_rwlock is held to serialize direnter and dirdeletes
250 255 */
251 256 ASSERT(RW_WRITE_HELD(&dir->tn_rwlock));
252 257 ASSERT(dir->tn_type == VDIR);
253 258
254 259 /*
255 260 * Don't allow '/' characters in pathname component
256 261 * (thus in ufs_direnter()).
257 262 */
258 263 for (s = name; *s; s++)
259 264 if (*s == '/')
|
↓ open down ↓ |
192 lines elided |
↑ open up ↑ |
260 265 return (EACCES);
261 266
262 267 if (name[0] == '\0')
263 268 panic("tdirenter: NULL name");
264 269
265 270 /*
266 271 * For link and rename lock the source entry and check the link count
267 272 * to see if it has been removed while it was unlocked.
268 273 */
269 274 if (op == DE_LINK || op == DE_RENAME) {
270 - if (tp != dir)
271 - rw_enter(&tp->tn_rwlock, RW_WRITER);
275 + if (tp != dir) {
276 + unsigned int tries = 0;
277 +
278 + /*
279 + * If we are acquiring tp->tn_rwlock (for SOURCE)
280 + * inside here, we must consider the following:
281 + *
282 + * - dir->tn_rwlock (TARGET) is already HELD (see
283 + * above ASSERT()).
284 + *
285 + * - It is possible our SOURCE is a parent of our
286 + * TARGET. Yes it's unusual, but it will return an
287 + * error below via tdircheckpath().
288 + *
289 + * - It is also possible that another thread,
290 + * concurrent to this one, is performing
291 + * rmdir(TARGET), which means it will first acquire
292 + * SOURCE's lock, THEN acquire TARGET's lock, which
293 + * could result in this thread holding TARGET and
294 + * trying for SOURCE, but the other thread holding
295 + * SOURCE and trying for TARGET. This is deadlock,
296 + * and it's inducible.
297 + *
298 + * To prevent this, we borrow some techniques from UFS
299 + * and rw_tryenter(), delaying if we fail, and
300 + * if someone tweaks the number of backoff tries to be
301 + * nonzero, return EBUSY after that number of tries.
302 + */
303 + while (!rw_tryenter(&tp->tn_rwlock, RW_WRITER)) {
304 + /*
305 + * Sloppy, but this is a diagnostic so atomic
306 + * increment would be overkill.
307 + */
308 + tmpfs_rename_loops++;
309 +
310 + if (tmpfs_rename_backoff_tries != 0) {
311 + if (tries > tmpfs_rename_backoff_tries)
312 + return (EBUSY);
313 + tries++;
314 + }
315 + /*
316 + * NOTE: We're still holding dir->tn_rwlock,
317 + * so drop it over the delay, so any other
318 + * thread can get its business done.
319 + *
320 + * No state change or state inspection happens
321 + * prior to here, so it is not wholly dangerous
322 + * to release-and-reacquire dir->tn_rwlock.
323 + *
324 + * Hold the vnode of dir in case it gets
325 + * released by another thread, though.
326 + */
327 + VN_HOLD(TNTOV(dir));
328 + rw_exit(&dir->tn_rwlock);
329 + delay(tmpfs_rename_backoff_delay);
330 + rw_enter(&dir->tn_rwlock, RW_WRITER);
331 + VN_RELE(TNTOV(dir));
332 + }
333 + }
272 334 mutex_enter(&tp->tn_tlock);
273 335 if (tp->tn_nlink == 0) {
274 336 mutex_exit(&tp->tn_tlock);
275 337 if (tp != dir)
276 338 rw_exit(&tp->tn_rwlock);
277 339 return (ENOENT);
278 340 }
279 341
280 342 if (tp->tn_nlink == MAXLINK) {
281 343 mutex_exit(&tp->tn_tlock);
282 344 if (tp != dir)
283 345 rw_exit(&tp->tn_rwlock);
284 346 return (EMLINK);
285 347 }
286 348 tp->tn_nlink++;
287 349 gethrestime(&tp->tn_ctime);
288 350 mutex_exit(&tp->tn_tlock);
289 351 if (tp != dir)
290 352 rw_exit(&tp->tn_rwlock);
291 353 }
292 354
293 355 /*
294 356 * This might be a "dangling detached directory".
295 357 * it could have been removed, but a reference
296 358 * to it kept in u_cwd. don't bother searching
297 359 * it, and with any luck the user will get tired
298 360 * of dealing with us and cd to some absolute
299 361 * pathway. *sigh*, thus in ufs, too.
300 362 */
301 363 if (dir->tn_nlink == 0) {
302 364 error = ENOENT;
303 365 goto out;
304 366 }
305 367
306 368 /*
307 369 * If this is a rename of a directory and the parent is
308 370 * different (".." must be changed), then the source
309 371 * directory must not be in the directory hierarchy
310 372 * above the target, as this would orphan everything
311 373 * below the source directory.
312 374 */
313 375 if (op == DE_RENAME) {
314 376 if (tp == dir) {
315 377 error = EINVAL;
316 378 goto out;
317 379 }
318 380 if (tp->tn_type == VDIR) {
319 381 if ((fromparent != dir) &&
320 382 (error = tdircheckpath(tp, dir, cred))) {
321 383 goto out;
322 384 }
323 385 }
324 386 }
325 387
326 388 /*
327 389 * Search for the entry. Return "found" if it exists.
328 390 */
329 391 tdp = tmpfs_hash_lookup(name, dir, 1, &found);
330 392
331 393 if (tdp) {
332 394 ASSERT(found);
333 395 switch (op) {
334 396 case DE_CREATE:
335 397 case DE_MKDIR:
336 398 if (tpp) {
337 399 *tpp = found;
338 400 error = EEXIST;
339 401 } else {
340 402 tmpnode_rele(found);
341 403 }
342 404 break;
343 405
344 406 case DE_RENAME:
345 407 error = tdirrename(fromparent, tp,
346 408 dir, name, found, tdp, cred);
347 409 if (error == 0) {
348 410 if (found != NULL) {
349 411 vnevent_rename_dest(TNTOV(found),
350 412 TNTOV(dir), name, ctp);
351 413 }
352 414 }
353 415
354 416 tmpnode_rele(found);
355 417 break;
356 418
357 419 case DE_LINK:
358 420 /*
359 421 * Can't link to an existing file.
360 422 */
361 423 error = EEXIST;
362 424 tmpnode_rele(found);
363 425 break;
364 426 }
365 427 } else {
366 428
367 429 /*
368 430 * The entry does not exist. Check write permission in
369 431 * directory to see if entry can be created.
370 432 */
371 433 if (error = tmp_taccess(dir, VWRITE, cred))
372 434 goto out;
373 435 if (op == DE_CREATE || op == DE_MKDIR) {
374 436 /*
375 437 * Make new tmpnode and directory entry as required.
376 438 */
377 439 error = tdirmaketnode(dir, tm, va, op, &tp, cred);
378 440 if (error)
379 441 goto out;
380 442 }
381 443 if (error = tdiraddentry(dir, tp, name, op, fromparent)) {
382 444 if (op == DE_CREATE || op == DE_MKDIR) {
383 445 /*
384 446 * Unmake the inode we just made.
385 447 */
386 448 rw_enter(&tp->tn_rwlock, RW_WRITER);
387 449 if ((tp->tn_type) == VDIR) {
388 450 ASSERT(tdp == NULL);
389 451 /*
390 452 * cleanup allocs made by tdirinit()
391 453 */
392 454 tdirtrunc(tp);
393 455 }
394 456 mutex_enter(&tp->tn_tlock);
395 457 tp->tn_nlink = 0;
396 458 mutex_exit(&tp->tn_tlock);
397 459 gethrestime(&tp->tn_ctime);
398 460 rw_exit(&tp->tn_rwlock);
399 461 tmpnode_rele(tp);
400 462 tp = NULL;
401 463 }
402 464 } else if (tpp) {
403 465 *tpp = tp;
404 466 } else if (op == DE_CREATE || op == DE_MKDIR) {
405 467 tmpnode_rele(tp);
406 468 }
407 469 }
408 470
409 471 out:
410 472 if (error && (op == DE_LINK || op == DE_RENAME)) {
411 473 /*
412 474 * Undo bumped link count.
413 475 */
414 476 DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock);
415 477 gethrestime(&tp->tn_ctime);
416 478 }
417 479 return (error);
418 480 }
419 481
420 482 /*
421 483 * Delete entry tp of name "nm" from dir.
422 484 * Free dir entry space and decrement link count on tmpnode(s).
423 485 *
424 486 * Return 0 on success.
425 487 */
426 488 int
427 489 tdirdelete(
428 490 struct tmpnode *dir,
429 491 struct tmpnode *tp,
430 492 char *nm,
431 493 enum dr_op op,
432 494 struct cred *cred)
433 495 {
434 496 struct tdirent *tpdp;
435 497 int error;
436 498 size_t namelen;
437 499 struct tmpnode *tnp;
438 500 timestruc_t now;
439 501
440 502 ASSERT(RW_WRITE_HELD(&dir->tn_rwlock));
441 503 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock));
442 504 ASSERT(dir->tn_type == VDIR);
443 505
444 506 if (nm[0] == '\0')
445 507 panic("tdirdelete: NULL name for %p", (void *)tp);
446 508
447 509 /*
448 510 * return error when removing . and ..
449 511 */
450 512 if (nm[0] == '.') {
451 513 if (nm[1] == '\0')
452 514 return (EINVAL);
453 515 if (nm[1] == '.' && nm[2] == '\0')
454 516 return (EEXIST); /* thus in ufs */
455 517 }
456 518
457 519 if (error = tmp_taccess(dir, VEXEC|VWRITE, cred))
458 520 return (error);
459 521
460 522 /*
461 523 * If the parent directory is "sticky", then the user must
462 524 * own the parent directory or the file in it, or else must
463 525 * have permission to write the file. Otherwise it may not
464 526 * be deleted (except by privileged users).
465 527 * Same as ufs_dirremove.
466 528 */
467 529 if ((error = tmp_sticky_remove_access(dir, tp, cred)) != 0)
468 530 return (error);
469 531
470 532 if (dir->tn_dir == NULL)
471 533 return (ENOENT);
472 534
473 535 tpdp = tmpfs_hash_lookup(nm, dir, 0, &tnp);
474 536 if (tpdp == NULL) {
475 537 /*
476 538 * If it is gone, some other thread got here first!
477 539 * Return error ENOENT.
478 540 */
479 541 return (ENOENT);
480 542 }
481 543
482 544 /*
483 545 * If the tmpnode in the tdirent changed, we were probably
484 546 * the victim of a concurrent rename operation. The original
485 547 * is gone, so return that status (same as UFS).
486 548 */
487 549 if (tp != tnp)
488 550 return (ENOENT);
489 551
490 552 tmpfs_hash_out(tpdp);
491 553
492 554 /*
493 555 * Take tpdp out of the directory list.
494 556 */
495 557 ASSERT(tpdp->td_next != tpdp);
496 558 ASSERT(tpdp->td_prev != tpdp);
497 559 if (tpdp->td_prev) {
498 560 tpdp->td_prev->td_next = tpdp->td_next;
499 561 }
500 562 if (tpdp->td_next) {
501 563 tpdp->td_next->td_prev = tpdp->td_prev;
502 564 }
503 565
504 566 /*
505 567 * If the roving slot pointer happens to match tpdp,
506 568 * point it at the previous dirent.
507 569 */
508 570 if (dir->tn_dir->td_prev == tpdp) {
509 571 dir->tn_dir->td_prev = tpdp->td_prev;
510 572 }
511 573 ASSERT(tpdp->td_next != tpdp);
512 574 ASSERT(tpdp->td_prev != tpdp);
513 575
514 576 /*
515 577 * tpdp points to the correct directory entry
516 578 */
517 579 namelen = strlen(tpdp->td_name) + 1;
518 580
519 581 tmp_memfree(tpdp, sizeof (struct tdirent) + namelen);
520 582 dir->tn_size -= (sizeof (struct tdirent) + namelen);
521 583 dir->tn_dirents--;
522 584
523 585 gethrestime(&now);
524 586 dir->tn_mtime = now;
525 587 dir->tn_ctime = now;
526 588 tp->tn_ctime = now;
527 589
528 590 ASSERT(tp->tn_nlink > 0);
529 591 DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock);
530 592 if (op == DR_RMDIR && tp->tn_type == VDIR) {
531 593 tdirtrunc(tp);
532 594 ASSERT(tp->tn_nlink == 0);
533 595 }
534 596 return (0);
535 597 }
536 598
537 599 /*
538 600 * tdirinit is used internally to initialize a directory (dir)
539 601 * with '.' and '..' entries without checking permissions and locking
540 602 */
541 603 void
542 604 tdirinit(
543 605 struct tmpnode *parent, /* parent of directory to initialize */
544 606 struct tmpnode *dir) /* the new directory */
545 607 {
546 608 struct tdirent *dot, *dotdot;
547 609 timestruc_t now;
548 610
549 611 ASSERT(RW_WRITE_HELD(&parent->tn_rwlock));
550 612 ASSERT(dir->tn_type == VDIR);
551 613
552 614 dot = tmp_memalloc(sizeof (struct tdirent) + 2, TMP_MUSTHAVE);
553 615 dotdot = tmp_memalloc(sizeof (struct tdirent) + 3, TMP_MUSTHAVE);
554 616
555 617 /*
556 618 * Initialize the entries
557 619 */
558 620 dot->td_tmpnode = dir;
559 621 dot->td_offset = 0;
560 622 dot->td_name = (char *)dot + sizeof (struct tdirent);
561 623 dot->td_name[0] = '.';
562 624 dot->td_parent = dir;
563 625 tmpfs_hash_in(dot);
564 626
565 627 dotdot->td_tmpnode = parent;
566 628 dotdot->td_offset = 1;
567 629 dotdot->td_name = (char *)dotdot + sizeof (struct tdirent);
568 630 dotdot->td_name[0] = '.';
569 631 dotdot->td_name[1] = '.';
570 632 dotdot->td_parent = dir;
571 633 tmpfs_hash_in(dotdot);
572 634
573 635 /*
574 636 * Initialize directory entry list.
575 637 */
576 638 dot->td_next = dotdot;
577 639 dot->td_prev = dotdot; /* dot's td_prev holds roving slot pointer */
578 640 dotdot->td_next = NULL;
579 641 dotdot->td_prev = dot;
580 642
581 643 gethrestime(&now);
582 644 dir->tn_mtime = now;
583 645 dir->tn_ctime = now;
584 646
585 647 /*
586 648 * Link counts are special for the hidden attribute directory.
587 649 * The only explicit reference in the name space is "." and
588 650 * the reference through ".." is not counted on the parent
589 651 * file. The attrdir is created as a side effect to lookup,
590 652 * so don't change the ctime of the parent.
591 653 * Since tdirinit is called with both dir and parent being the
592 654 * same for the root vnode, we need to increment this before we set
593 655 * tn_nlink = 2 below.
594 656 */
595 657 if (!(dir->tn_vnode->v_flag & V_XATTRDIR)) {
596 658 INCR_COUNT(&parent->tn_nlink, &parent->tn_tlock);
597 659 parent->tn_ctime = now;
598 660 }
599 661
600 662 dir->tn_dir = dot;
601 663 dir->tn_size = 2 * sizeof (struct tdirent) + 5; /* dot and dotdot */
602 664 dir->tn_dirents = 2;
603 665 dir->tn_nlink = 2;
604 666 }
605 667
606 668
607 669 /*
608 670 * tdirtrunc is called to remove all directory entries under this directory.
609 671 */
610 672 void
611 673 tdirtrunc(struct tmpnode *dir)
612 674 {
613 675 struct tdirent *tdp;
614 676 struct tmpnode *tp;
615 677 size_t namelen;
616 678 timestruc_t now;
617 679 int isvattrdir, isdotdot, skip_decr;
618 680
619 681 ASSERT(RW_WRITE_HELD(&dir->tn_rwlock));
620 682 ASSERT(dir->tn_type == VDIR);
621 683
622 684 isvattrdir = (dir->tn_vnode->v_flag & V_XATTRDIR) ? 1 : 0;
623 685 for (tdp = dir->tn_dir; tdp; tdp = dir->tn_dir) {
624 686 ASSERT(tdp->td_next != tdp);
625 687 ASSERT(tdp->td_prev != tdp);
626 688 ASSERT(tdp->td_tmpnode);
627 689
628 690 dir->tn_dir = tdp->td_next;
629 691 namelen = strlen(tdp->td_name) + 1;
630 692
631 693 /*
632 694 * Adjust the link counts to account for this directory
633 695 * entry removal. Hidden attribute directories may
634 696 * not be empty as they may be truncated as a side-
635 697 * effect of removing the parent. We do hold/rele
636 698 * operations to free up these tmpnodes.
637 699 *
638 700 * Skip the link count adjustment for parents of
639 701 * attribute directories as those link counts
640 702 * do not include the ".." reference in the hidden
641 703 * directories.
642 704 */
643 705 tp = tdp->td_tmpnode;
644 706 isdotdot = (strcmp("..", tdp->td_name) == 0);
645 707 skip_decr = (isvattrdir && isdotdot);
646 708 if (!skip_decr) {
647 709 ASSERT(tp->tn_nlink > 0);
648 710 DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock);
649 711 }
650 712
651 713 tmpfs_hash_out(tdp);
652 714
653 715 tmp_memfree(tdp, sizeof (struct tdirent) + namelen);
654 716 dir->tn_size -= (sizeof (struct tdirent) + namelen);
655 717 dir->tn_dirents--;
656 718 }
657 719
658 720 gethrestime(&now);
659 721 dir->tn_mtime = now;
660 722 dir->tn_ctime = now;
661 723
662 724 ASSERT(dir->tn_dir == NULL);
663 725 ASSERT(dir->tn_size == 0);
664 726 ASSERT(dir->tn_dirents == 0);
665 727 }
666 728
667 729 /*
668 730 * Check if the source directory is in the path of the target directory.
669 731 * The target directory is locked by the caller.
670 732 *
671 733 * XXX - The source and target's should be different upon entry.
672 734 */
673 735 static int
674 736 tdircheckpath(
675 737 struct tmpnode *fromtp,
676 738 struct tmpnode *toparent,
677 739 struct cred *cred)
678 740 {
679 741 int error = 0;
680 742 struct tmpnode *dir, *dotdot;
681 743 struct tdirent *tdp;
682 744
683 745 ASSERT(RW_WRITE_HELD(&toparent->tn_rwlock));
684 746
685 747 tdp = tmpfs_hash_lookup("..", toparent, 1, &dotdot);
686 748 if (tdp == NULL)
687 749 return (ENOENT);
688 750
689 751 ASSERT(dotdot);
690 752
691 753 if (dotdot == toparent) {
692 754 /* root of fs. search trivially satisfied. */
693 755 tmpnode_rele(dotdot);
694 756 return (0);
695 757 }
696 758 for (;;) {
697 759 /*
698 760 * Return error for cases like "mv c c/d",
699 761 * "mv c c/d/e" and so on.
700 762 */
701 763 if (dotdot == fromtp) {
702 764 tmpnode_rele(dotdot);
703 765 error = EINVAL;
704 766 break;
705 767 }
706 768 dir = dotdot;
707 769 error = tdirlookup(dir, "..", &dotdot, cred);
708 770 if (error) {
709 771 tmpnode_rele(dir);
710 772 break;
711 773 }
712 774 /*
713 775 * We're okay if we traverse the directory tree up to
714 776 * the root directory and don't run into the
715 777 * parent directory.
716 778 */
717 779 if (dir == dotdot) {
718 780 tmpnode_rele(dir);
719 781 tmpnode_rele(dotdot);
720 782 break;
721 783 }
722 784 tmpnode_rele(dir);
723 785 }
724 786 return (error);
725 787 }
726 788
727 789 static int
728 790 tdirrename(
729 791 struct tmpnode *fromparent, /* parent directory of source */
730 792 struct tmpnode *fromtp, /* source tmpnode */
731 793 struct tmpnode *toparent, /* parent directory of target */
732 794 char *nm, /* entry we are trying to change */
733 795 struct tmpnode *to, /* target tmpnode */
734 796 struct tdirent *where, /* target tmpnode directory entry */
735 797 struct cred *cred) /* credentials */
736 798 {
737 799 int error = 0;
738 800 int doingdirectory;
739 801 timestruc_t now;
740 802
741 803 #if defined(lint)
742 804 nm = nm;
743 805 #endif
744 806 ASSERT(RW_WRITE_HELD(&toparent->tn_rwlock));
745 807
746 808 /*
747 809 * Short circuit rename of something to itself.
748 810 */
749 811 if (fromtp == to)
750 812 return (ESAME); /* special KLUDGE error code */
751 813
752 814 rw_enter(&fromtp->tn_rwlock, RW_READER);
753 815 rw_enter(&to->tn_rwlock, RW_READER);
754 816
755 817 /*
756 818 * Check that everything is on the same filesystem.
757 819 */
758 820 if (to->tn_vnode->v_vfsp != toparent->tn_vnode->v_vfsp ||
759 821 to->tn_vnode->v_vfsp != fromtp->tn_vnode->v_vfsp) {
760 822 error = EXDEV;
761 823 goto out;
762 824 }
763 825
764 826 /*
765 827 * Must have write permission to rewrite target entry.
766 828 * Check for stickyness.
767 829 */
768 830 if ((error = tmp_taccess(toparent, VWRITE, cred)) != 0 ||
769 831 (error = tmp_sticky_remove_access(toparent, to, cred)) != 0)
770 832 goto out;
771 833
772 834 /*
773 835 * Ensure source and target are compatible (both directories
774 836 * or both not directories). If target is a directory it must
775 837 * be empty and have no links to it; in addition it must not
776 838 * be a mount point, and both the source and target must be
777 839 * writable.
778 840 */
779 841 doingdirectory = (fromtp->tn_type == VDIR);
780 842 if (to->tn_type == VDIR) {
781 843 if (!doingdirectory) {
782 844 error = EISDIR;
783 845 goto out;
784 846 }
785 847 /*
786 848 * vn_vfswlock will prevent mounts from using the directory
787 849 * until we are done.
788 850 */
789 851 if (vn_vfswlock(TNTOV(to))) {
790 852 error = EBUSY;
791 853 goto out;
792 854 }
793 855 if (vn_mountedvfs(TNTOV(to)) != NULL) {
794 856 vn_vfsunlock(TNTOV(to));
795 857 error = EBUSY;
796 858 goto out;
797 859 }
798 860
799 861 mutex_enter(&to->tn_tlock);
800 862 if (to->tn_dirents > 2 || to->tn_nlink > 2) {
801 863 mutex_exit(&to->tn_tlock);
802 864 vn_vfsunlock(TNTOV(to));
803 865 error = EEXIST; /* SIGH should be ENOTEMPTY */
804 866 /*
805 867 * Update atime because checking tn_dirents is
806 868 * logically equivalent to reading the directory
807 869 */
808 870 gethrestime(&to->tn_atime);
809 871 goto out;
810 872 }
811 873 mutex_exit(&to->tn_tlock);
812 874 } else if (doingdirectory) {
813 875 error = ENOTDIR;
814 876 goto out;
815 877 }
816 878
817 879 tmpfs_hash_change(where, fromtp);
818 880 gethrestime(&now);
819 881 toparent->tn_mtime = now;
820 882 toparent->tn_ctime = now;
821 883
822 884 /*
823 885 * Upgrade to write lock on "to" (i.e., the target tmpnode).
824 886 */
825 887 rw_exit(&to->tn_rwlock);
826 888 rw_enter(&to->tn_rwlock, RW_WRITER);
827 889
828 890 /*
829 891 * Decrement the link count of the target tmpnode.
830 892 */
831 893 DECR_COUNT(&to->tn_nlink, &to->tn_tlock);
832 894 to->tn_ctime = now;
833 895
834 896 if (doingdirectory) {
835 897 /*
836 898 * The entry for "to" no longer exists so release the vfslock.
837 899 */
838 900 vn_vfsunlock(TNTOV(to));
839 901
840 902 /*
841 903 * Decrement the target link count and delete all entires.
842 904 */
843 905 tdirtrunc(to);
844 906 ASSERT(to->tn_nlink == 0);
845 907
846 908 /*
847 909 * Renaming a directory with the parent different
848 910 * requires that ".." be rewritten. The window is
849 911 * still there for ".." to be inconsistent, but this
850 912 * is unavoidable, and a lot shorter than when it was
851 913 * done in a user process.
852 914 */
853 915 if (fromparent != toparent)
854 916 tdirfixdotdot(fromtp, fromparent, toparent);
855 917 }
856 918 out:
857 919 rw_exit(&to->tn_rwlock);
858 920 rw_exit(&fromtp->tn_rwlock);
859 921 return (error);
860 922 }
861 923
862 924 static void
863 925 tdirfixdotdot(
864 926 struct tmpnode *fromtp, /* child directory */
865 927 struct tmpnode *fromparent, /* old parent directory */
866 928 struct tmpnode *toparent) /* new parent directory */
867 929 {
868 930 struct tdirent *dotdot;
869 931
870 932 ASSERT(RW_LOCK_HELD(&toparent->tn_rwlock));
871 933
872 934 /*
873 935 * Increment the link count in the new parent tmpnode
874 936 */
875 937 INCR_COUNT(&toparent->tn_nlink, &toparent->tn_tlock);
876 938 gethrestime(&toparent->tn_ctime);
877 939
878 940 dotdot = tmpfs_hash_lookup("..", fromtp, 0, NULL);
879 941
880 942 ASSERT(dotdot->td_tmpnode == fromparent);
881 943 dotdot->td_tmpnode = toparent;
882 944
883 945 /*
884 946 * Decrement the link count of the old parent tmpnode.
885 947 * If fromparent is NULL, then this is a new directory link;
886 948 * it has no parent, so we need not do anything.
887 949 */
888 950 if (fromparent != NULL) {
889 951 mutex_enter(&fromparent->tn_tlock);
890 952 if (fromparent->tn_nlink != 0) {
891 953 fromparent->tn_nlink--;
892 954 gethrestime(&fromparent->tn_ctime);
893 955 }
894 956 mutex_exit(&fromparent->tn_tlock);
895 957 }
896 958 }
897 959
898 960 static int
899 961 tdiraddentry(
900 962 struct tmpnode *dir, /* target directory to make entry in */
901 963 struct tmpnode *tp, /* new tmpnode */
902 964 char *name,
903 965 enum de_op op,
904 966 struct tmpnode *fromtp)
905 967 {
906 968 struct tdirent *tdp, *tpdp;
907 969 size_t namelen, alloc_size;
908 970 timestruc_t now;
909 971
910 972 /*
911 973 * Make sure the parent directory wasn't removed from
912 974 * underneath the caller.
913 975 */
914 976 if (dir->tn_dir == NULL)
915 977 return (ENOENT);
916 978
917 979 /*
918 980 * Check that everything is on the same filesystem.
919 981 */
920 982 if (tp->tn_vnode->v_vfsp != dir->tn_vnode->v_vfsp)
921 983 return (EXDEV);
922 984
923 985 /*
924 986 * Allocate and initialize directory entry
925 987 */
926 988 namelen = strlen(name) + 1;
927 989 alloc_size = namelen + sizeof (struct tdirent);
928 990 tdp = tmp_memalloc(alloc_size, 0);
929 991 if (tdp == NULL)
930 992 return (ENOSPC);
931 993
932 994 if ((op == DE_RENAME) && (tp->tn_type == VDIR))
933 995 tdirfixdotdot(tp, fromtp, dir);
934 996
935 997 dir->tn_size += alloc_size;
936 998 dir->tn_dirents++;
937 999 tdp->td_tmpnode = tp;
938 1000 tdp->td_parent = dir;
939 1001
940 1002 /*
941 1003 * The directory entry and its name were allocated sequentially.
942 1004 */
943 1005 tdp->td_name = (char *)tdp + sizeof (struct tdirent);
944 1006 (void) strcpy(tdp->td_name, name);
945 1007
946 1008 tmpfs_hash_in(tdp);
947 1009
948 1010 /*
949 1011 * Some utilities expect the size of a directory to remain
950 1012 * somewhat static. For example, a routine which unlinks
951 1013 * files between calls to readdir(); the size of the
952 1014 * directory changes from underneath it and so the real
953 1015 * directory offset in bytes is invalid. To circumvent
954 1016 * this problem, we initialize a directory entry with an
955 1017 * phony offset, and use this offset to determine end of
956 1018 * file in tmp_readdir.
957 1019 */
958 1020 tpdp = dir->tn_dir->td_prev;
959 1021 /*
960 1022 * Install at first empty "slot" in directory list.
961 1023 */
962 1024 while (tpdp->td_next != NULL && (tpdp->td_next->td_offset -
963 1025 tpdp->td_offset) <= 1) {
964 1026 ASSERT(tpdp->td_next != tpdp);
965 1027 ASSERT(tpdp->td_prev != tpdp);
966 1028 ASSERT(tpdp->td_next->td_offset > tpdp->td_offset);
967 1029 tpdp = tpdp->td_next;
968 1030 }
969 1031 tdp->td_offset = tpdp->td_offset + 1;
970 1032
971 1033 /*
972 1034 * If we're at the end of the dirent list and the offset (which
973 1035 * is necessarily the largest offset in this directory) is more
974 1036 * than twice the number of dirents, that means the directory is
975 1037 * 50% holes. At this point we reset the slot pointer back to
976 1038 * the beginning of the directory so we start using the holes.
977 1039 * The idea is that if there are N dirents, there must also be
978 1040 * N holes, so we can satisfy the next N creates by walking at
979 1041 * most 2N entries; thus the average cost of a create is constant.
980 1042 * Note that we use the first dirent's td_prev as the roving
981 1043 * slot pointer; it's ugly, but it saves a word in every dirent.
982 1044 */
983 1045 if (tpdp->td_next == NULL && tpdp->td_offset > 2 * dir->tn_dirents)
984 1046 dir->tn_dir->td_prev = dir->tn_dir->td_next;
985 1047 else
986 1048 dir->tn_dir->td_prev = tdp;
987 1049
988 1050 ASSERT(tpdp->td_next != tpdp);
989 1051 ASSERT(tpdp->td_prev != tpdp);
990 1052
991 1053 tdp->td_next = tpdp->td_next;
992 1054 if (tdp->td_next) {
993 1055 tdp->td_next->td_prev = tdp;
994 1056 }
995 1057 tdp->td_prev = tpdp;
996 1058 tpdp->td_next = tdp;
997 1059
998 1060 ASSERT(tdp->td_next != tdp);
999 1061 ASSERT(tdp->td_prev != tdp);
1000 1062 ASSERT(tpdp->td_next != tpdp);
1001 1063 ASSERT(tpdp->td_prev != tpdp);
1002 1064
1003 1065 gethrestime(&now);
1004 1066 dir->tn_mtime = now;
1005 1067 dir->tn_ctime = now;
1006 1068
1007 1069 return (0);
1008 1070 }
1009 1071
1010 1072 static int
1011 1073 tdirmaketnode(
1012 1074 struct tmpnode *dir,
1013 1075 struct tmount *tm,
1014 1076 struct vattr *va,
1015 1077 enum de_op op,
1016 1078 struct tmpnode **newnode,
1017 1079 struct cred *cred)
1018 1080 {
1019 1081 struct tmpnode *tp;
1020 1082 enum vtype type;
1021 1083
1022 1084 ASSERT(va != NULL);
1023 1085 ASSERT(op == DE_CREATE || op == DE_MKDIR);
1024 1086 if (((va->va_mask & AT_ATIME) && TIMESPEC_OVERFLOW(&va->va_atime)) ||
1025 1087 ((va->va_mask & AT_MTIME) && TIMESPEC_OVERFLOW(&va->va_mtime)))
1026 1088 return (EOVERFLOW);
1027 1089 type = va->va_type;
1028 1090 tp = tmp_memalloc(sizeof (struct tmpnode), TMP_MUSTHAVE);
1029 1091 tmpnode_init(tm, tp, va, cred);
1030 1092
1031 1093 /* setup normal file/dir's extended attribute directory */
1032 1094 if (dir->tn_flags & ISXATTR) {
1033 1095 /* parent dir is , mark file as xattr */
1034 1096 tp->tn_flags |= ISXATTR;
1035 1097 }
1036 1098
1037 1099
1038 1100 if (type == VBLK || type == VCHR) {
1039 1101 tp->tn_vnode->v_rdev = tp->tn_rdev = va->va_rdev;
1040 1102 } else {
1041 1103 tp->tn_vnode->v_rdev = tp->tn_rdev = NODEV;
1042 1104 }
1043 1105 tp->tn_vnode->v_type = type;
1044 1106 tp->tn_uid = crgetuid(cred);
1045 1107
1046 1108 /*
1047 1109 * To determine the group-id of the created file:
1048 1110 * 1) If the gid is set in the attribute list (non-Sun & pre-4.0
1049 1111 * clients are not likely to set the gid), then use it if
1050 1112 * the process is privileged, belongs to the target group,
1051 1113 * or the group is the same as the parent directory.
1052 1114 * 2) If the filesystem was not mounted with the Old-BSD-compatible
1053 1115 * GRPID option, and the directory's set-gid bit is clear,
1054 1116 * then use the process's gid.
1055 1117 * 3) Otherwise, set the group-id to the gid of the parent directory.
1056 1118 */
1057 1119 if ((va->va_mask & AT_GID) &&
1058 1120 ((va->va_gid == dir->tn_gid) || groupmember(va->va_gid, cred) ||
1059 1121 secpolicy_vnode_create_gid(cred) == 0)) {
1060 1122 /*
1061 1123 * XXX - is this only the case when a 4.0 NFS client, or a
1062 1124 * client derived from that code, makes a call over the wire?
1063 1125 */
1064 1126 tp->tn_gid = va->va_gid;
1065 1127 } else {
1066 1128 if (dir->tn_mode & VSGID)
1067 1129 tp->tn_gid = dir->tn_gid;
1068 1130 else
1069 1131 tp->tn_gid = crgetgid(cred);
1070 1132 }
1071 1133 /*
1072 1134 * If we're creating a directory, and the parent directory has the
1073 1135 * set-GID bit set, set it on the new directory.
1074 1136 * Otherwise, if the user is neither privileged nor a member of the
1075 1137 * file's new group, clear the file's set-GID bit.
1076 1138 */
1077 1139 if (dir->tn_mode & VSGID && type == VDIR)
1078 1140 tp->tn_mode |= VSGID;
1079 1141 else {
1080 1142 if ((tp->tn_mode & VSGID) &&
1081 1143 secpolicy_vnode_setids_setgids(cred, tp->tn_gid) != 0)
1082 1144 tp->tn_mode &= ~VSGID;
1083 1145 }
1084 1146
1085 1147 if (va->va_mask & AT_ATIME)
1086 1148 tp->tn_atime = va->va_atime;
1087 1149 if (va->va_mask & AT_MTIME)
1088 1150 tp->tn_mtime = va->va_mtime;
1089 1151
1090 1152 if (op == DE_MKDIR)
1091 1153 tdirinit(dir, tp);
1092 1154
1093 1155 *newnode = tp;
1094 1156 return (0);
1095 1157 }
|
↓ open down ↓ |
814 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX