Print this page
XXXXX tmpfs can be induced to deadlock

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/tmpfs/tmp_dir.c
          +++ new/usr/src/uts/common/fs/tmpfs/tmp_dir.c
↓ open down ↓ 47 lines elided ↑ open up ↑
  48   48  static void tdirfixdotdot(struct tmpnode *, struct tmpnode *, struct tmpnode *);
  49   49  static int tdirmaketnode(struct tmpnode *, struct tmount *, struct vattr *,
  50   50          enum de_op, struct tmpnode **, struct cred *);
  51   51  static int tdiraddentry(struct tmpnode *, struct tmpnode *, char *,
  52   52          enum de_op, struct tmpnode *);
  53   53  
  54   54  
  55   55  #define T_HASH_SIZE     8192            /* must be power of 2 */
  56   56  #define T_MUTEX_SIZE    64
  57   57  
       58 +/* Non-static so compilers won't constant-fold these away. */
       59 +clock_t tmpfs_rename_backoff_delay = 1;
       60 +unsigned int tmpfs_rename_backoff_tries = 0;
       61 +unsigned long tmpfs_rename_loops = 0;
       62 +
  58   63  static struct tdirent   *t_hashtable[T_HASH_SIZE];
  59   64  static kmutex_t          t_hashmutex[T_MUTEX_SIZE];
  60   65  
  61   66  #define T_HASH_INDEX(a)         ((a) & (T_HASH_SIZE-1))
  62   67  #define T_MUTEX_INDEX(a)        ((a) & (T_MUTEX_SIZE-1))
  63   68  
  64   69  #define TMPFS_HASH(tp, name, hash)                              \
  65   70          {                                                       \
  66   71                  char Xc, *Xcp;                                  \
  67   72                  hash = (uint_t)(uintptr_t)(tp) >> 8;            \
↓ open down ↓ 192 lines elided ↑ open up ↑
 260  265                          return (EACCES);
 261  266  
 262  267          if (name[0] == '\0')
 263  268                  panic("tdirenter: NULL name");
 264  269  
 265  270          /*
 266  271           * For link and rename lock the source entry and check the link count
 267  272           * to see if it has been removed while it was unlocked.
 268  273           */
 269  274          if (op == DE_LINK || op == DE_RENAME) {
 270      -                if (tp != dir)
 271      -                        rw_enter(&tp->tn_rwlock, RW_WRITER);
      275 +                if (tp != dir) {
      276 +                        unsigned int tries = 0;
      277 +
      278 +                        /*
      279 +                         * If we are acquiring tp->tn_rwlock (for SOURCE)
      280 +                         * inside here, we must consider the following:
      281 +                         *
      282 +                         * - dir->tn_rwlock (TARGET) is already HELD (see
      283 +                         * above ASSERT()).
      284 +                         *
      285 +                         * - It is possible our SOURCE is a parent of our
      286 +                         * TARGET. Yes it's unusual, but it will return an
      287 +                         * error below via tdircheckpath().
      288 +                         *
      289 +                         * - It is also possible that another thread,
      290 +                         * concurrent to this one, is performing
      291 +                         * rmdir(TARGET), which means it will first acquire
      292 +                         * SOURCE's lock, THEN acquire TARGET's lock, which
      293 +                         * could result in this thread holding TARGET and
      294 +                         * trying for SOURCE, but the other thread holding
      295 +                         * SOURCE and trying for TARGET.  This is deadlock,
      296 +                         * and it's inducible.
      297 +                         *
      298 +                         * To prevent this, we borrow some techniques from UFS
      299 +                         * and rw_tryenter(), delaying if we fail, and
      300 +                         * if someone tweaks the number of backoff tries to be
      301 +                         * nonzero, return EBUSY after that number of tries.
      302 +                         */
      303 +                        while (!rw_tryenter(&tp->tn_rwlock, RW_WRITER)) {
      304 +                                /*
      305 +                                 * Sloppy, but this is a diagnostic so atomic
      306 +                                 * increment would be overkill.
      307 +                                 */
      308 +                                tmpfs_rename_loops++;
      309 +
      310 +                                if (tmpfs_rename_backoff_tries != 0) {
      311 +                                        if (tries > tmpfs_rename_backoff_tries)
      312 +                                                return (EBUSY);
      313 +                                        tries++;
      314 +                                }
      315 +                                /*
      316 +                                 * NOTE: We're still holding dir->tn_rwlock,
      317 +                                 * so drop it over the delay, so any other
      318 +                                 * thread can get its business done.
      319 +                                 *
      320 +                                 * No state change or state inspection happens
      321 +                                 * prior to here, so it is not wholly dangerous
      322 +                                 * to release-and-reacquire dir->tn_rwlock.
      323 +                                 *
      324 +                                 * Hold the vnode of dir in case it gets
      325 +                                 * released by another thread, though.
      326 +                                 */
      327 +                                VN_HOLD(TNTOV(dir));
      328 +                                rw_exit(&dir->tn_rwlock);
      329 +                                delay(tmpfs_rename_backoff_delay);
      330 +                                rw_enter(&dir->tn_rwlock, RW_WRITER);
      331 +                                VN_RELE(TNTOV(dir));
      332 +                        }
      333 +                }
 272  334                  mutex_enter(&tp->tn_tlock);
 273  335                  if (tp->tn_nlink == 0) {
 274  336                          mutex_exit(&tp->tn_tlock);
 275  337                          if (tp != dir)
 276  338                                  rw_exit(&tp->tn_rwlock);
 277  339                          return (ENOENT);
 278  340                  }
 279  341  
 280  342                  if (tp->tn_nlink == MAXLINK) {
 281  343                          mutex_exit(&tp->tn_tlock);
↓ open down ↓ 814 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX