Print this page
    
re #13613 rb4516 Tunables needs volatile keyword
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/dnlc.c
          +++ new/usr/src/uts/common/fs/dnlc.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  
    | 
      ↓ open down ↓ | 
    12 lines elided | 
    
      ↑ open up ↑ | 
  
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
       23 + * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
  23   24   * Copyright (c) 2015, Joyent, Inc.
  24   25   * Copyright (c) 2017 by Delphix. All rights reserved.
  25   26   */
  26   27  
  27   28  /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T     */
  28   29  /*        All Rights Reserved   */
  29   30  
  30   31  /*
  31   32   * University Copyright- Copyright (c) 1982, 1986, 1988
  32   33   * The Regents of the University of California
  33   34   * All Rights Reserved
  34   35   *
  35   36   * University Acknowledgment- Portions of this document are derived from
  36   37   * software developed by the University of California, Berkeley, and its
  37   38   * contributors.
  38   39   */
  39   40  
  40   41  #include <sys/types.h>
  41   42  #include <sys/systm.h>
  42   43  #include <sys/param.h>
  43   44  #include <sys/t_lock.h>
  44   45  #include <sys/systm.h>
  45   46  #include <sys/vfs.h>
  46   47  #include <sys/vnode.h>
  47   48  #include <sys/dnlc.h>
  48   49  #include <sys/kmem.h>
  49   50  #include <sys/cmn_err.h>
  50   51  #include <sys/vtrace.h>
  51   52  #include <sys/bitmap.h>
  52   53  #include <sys/var.h>
  53   54  #include <sys/sysmacros.h>
  54   55  #include <sys/kstat.h>
  55   56  #include <sys/atomic.h>
  56   57  #include <sys/taskq.h>
  57   58  
  58   59  /*
  59   60   * Directory name lookup cache.
  60   61   * Based on code originally done by Robert Elz at Melbourne.
  61   62   *
  62   63   * Names found by directory scans are retained in a cache
  63   64   * for future reference.  Each hash chain is ordered by LRU
  64   65   * Cache is indexed by hash value obtained from (vp, name)
  65   66   * where the vp refers to the directory containing the name.
  66   67   */
  67   68  
  68   69  /*
  69   70   * We want to be able to identify files that are referenced only by the DNLC.
  70   71   * When adding a reference from the DNLC, call VN_HOLD_DNLC instead of VN_HOLD,
  71   72   * since multiple DNLC references should only be counted once in v_count. The
  72   73   * VN_HOLD macro itself is aliased to VN_HOLD_CALLER in this file to help
  73   74   * differentiate the behaviors.  (Unfortunately it is not possible to #undef
  74   75   * VN_HOLD and retain VN_HOLD_CALLER. Ideally a Makefile rule would grep
  75   76   * uncommented C tokens to check that VN_HOLD is referenced only once in this
  76   77   * file, to define VN_HOLD_CALLER.)
  77   78   */
  78   79  #define VN_HOLD_CALLER  VN_HOLD
  79   80  #define VN_HOLD_DNLC(vp)        {       \
  80   81          mutex_enter(&(vp)->v_lock);     \
  81   82          if ((vp)->v_count_dnlc == 0) {  \
  82   83                  VN_HOLD_LOCKED(vp);     \
  83   84          }                               \
  84   85          (vp)->v_count_dnlc++;           \
  85   86          mutex_exit(&(vp)->v_lock);      \
  86   87  }
  87   88  #define VN_RELE_DNLC(vp)        {       \
  88   89          vn_rele_dnlc(vp);               \
  89   90  }
  90   91  
  91   92  /*
  92   93   * Tunable nc_hashavelen is the average length desired for this chain, from
  93   94   * which the size of the nc_hash table is derived at create time.
  94   95   */
  95   96  #define NC_HASHAVELEN_DEFAULT   4
  96   97  int nc_hashavelen = NC_HASHAVELEN_DEFAULT;
  97   98  
  98   99  /*
  99  100   * NC_MOVETOFRONT is the move-to-front threshold: if the hash lookup
 100  101   * depth exceeds this value, we move the looked-up entry to the front of
 101  102   * its hash chain.  The idea is to make sure that the most frequently
 102  103   * accessed entries are found most quickly (by keeping them near the
 103  104   * front of their hash chains).
 104  105   */
 105  106  #define NC_MOVETOFRONT  2
 106  107  
 107  108  /*
 108  109   *
 109  110   * DNLC_MAX_RELE is used to size an array on the stack when releasing
 110  111   * vnodes. This array is used rather than calling VN_RELE() inline because
 111  112   * all dnlc locks must be dropped by that time in order to avoid a
 112  113   * possible deadlock. This deadlock occurs when the dnlc holds the last
 113  114   * reference to the vnode and so the VOP_INACTIVE vector is called which
 114  115   * can in turn call back into the dnlc. A global array was used but had
 115  116   * many problems:
 116  117   *      1) Actually doesn't have an upper bound on the array size as
 117  118   *         entries can be added after starting the purge.
 118  119   *      2) The locking scheme causes a hang.
 119  120   *      3) Caused serialisation on the global lock.
 120  121   *      4) The array was often unnecessarily huge.
 121  122   *
 122  123   * Note the current value 8 allows up to 4 cache entries (to be purged
 123  124   * from each hash chain), before having to cycle around and retry.
 124  125   * This ought to be ample given that nc_hashavelen is typically very small.
 125  126   */
 126  127  #define DNLC_MAX_RELE   8 /* must be even */
 127  128  
 128  129  /*
 129  130   * Hash table of name cache entries for fast lookup, dynamically
 130  131   * allocated at startup.
 131  132   */
 132  133  nc_hash_t *nc_hash;
 133  134  
 134  135  /*
 135  136   * Rotors. Used to select entries on a round-robin basis.
 136  137   */
  
    | 
      ↓ open down ↓ | 
    104 lines elided | 
    
      ↑ open up ↑ | 
  
 137  138  static nc_hash_t *dnlc_purge_fs1_rotor;
 138  139  static nc_hash_t *dnlc_free_rotor;
 139  140  
 140  141  /*
 141  142   * # of dnlc entries (uninitialized)
 142  143   *
 143  144   * the initial value was chosen as being
 144  145   * a random string of bits, probably not
 145  146   * normally chosen by a systems administrator
 146  147   */
 147      -int ncsize = -1;
      148 +volatile int ncsize = -1;
 148  149  volatile uint32_t dnlc_nentries = 0;    /* current num of name cache entries */
 149  150  static int nc_hashsz;                   /* size of hash table */
 150  151  static int nc_hashmask;                 /* size of hash table minus 1 */
 151  152  
 152  153  /*
 153  154   * The dnlc_reduce_cache() taskq queue is activated when there are
 154  155   * ncsize name cache entries and if no parameter is provided, it reduces
 155  156   * the size down to dnlc_nentries_low_water, which is by default one
 156  157   * hundreth less (or 99%) of ncsize.
 157  158   *
 158  159   * If a parameter is provided to dnlc_reduce_cache(), then we reduce
 159  160   * the size down based on ncsize_onepercent - where ncsize_onepercent
 160  161   * is 1% of ncsize; however, we never let dnlc_reduce_cache() reduce
 161  162   * the size below 3% of ncsize (ncsize_min_percent).
 162  163   */
 163  164  #define DNLC_LOW_WATER_DIVISOR_DEFAULT 100
 164  165  uint_t dnlc_low_water_divisor = DNLC_LOW_WATER_DIVISOR_DEFAULT;
 165  166  uint_t dnlc_nentries_low_water;
 166  167  int dnlc_reduce_idle = 1; /* no locking needed */
 167  168  uint_t ncsize_onepercent;
 168  169  uint_t ncsize_min_percent;
 169  170  
 170  171  /*
 171  172   * If dnlc_nentries hits dnlc_max_nentries (twice ncsize)
 172  173   * then this means the dnlc_reduce_cache() taskq is failing to
 173  174   * keep up. In this case we refuse to add new entries to the dnlc
 174  175   * until the taskq catches up.
 175  176   */
 176  177  uint_t dnlc_max_nentries; /* twice ncsize */
 177  178  uint64_t dnlc_max_nentries_cnt = 0; /* statistic on times we failed */
 178  179  
 179  180  /*
 180  181   * Tunable to define when we should just remove items from
 181  182   * the end of the chain.
 182  183   */
 183  184  #define DNLC_LONG_CHAIN 8
 184  185  uint_t dnlc_long_chain = DNLC_LONG_CHAIN;
 185  186  
 186  187  /*
 187  188   * ncstats has been deprecated, due to the integer size of the counters
 188  189   * which can easily overflow in the dnlc.
 189  190   * It is maintained (at some expense) for compatability.
 190  191   * The preferred interface is the kstat accessible nc_stats below.
 191  192   */
 192  193  struct ncstats ncstats;
 193  194  
 194  195  struct nc_stats ncs = {
 195  196          { "hits",                       KSTAT_DATA_UINT64 },
 196  197          { "misses",                     KSTAT_DATA_UINT64 },
 197  198          { "negative_cache_hits",        KSTAT_DATA_UINT64 },
 198  199          { "enters",                     KSTAT_DATA_UINT64 },
 199  200          { "double_enters",              KSTAT_DATA_UINT64 },
 200  201          { "purge_total_entries",        KSTAT_DATA_UINT64 },
 201  202          { "purge_all",                  KSTAT_DATA_UINT64 },
 202  203          { "purge_vp",                   KSTAT_DATA_UINT64 },
 203  204          { "purge_vfs",                  KSTAT_DATA_UINT64 },
 204  205          { "purge_fs1",                  KSTAT_DATA_UINT64 },
 205  206          { "pick_free",                  KSTAT_DATA_UINT64 },
 206  207          { "pick_heuristic",             KSTAT_DATA_UINT64 },
 207  208          { "pick_last",                  KSTAT_DATA_UINT64 },
 208  209  
 209  210          /* directory caching stats */
 210  211  
 211  212          { "dir_hits",                   KSTAT_DATA_UINT64 },
 212  213          { "dir_misses",                 KSTAT_DATA_UINT64 },
 213  214          { "dir_cached_current",         KSTAT_DATA_UINT64 },
 214  215          { "dir_entries_cached_current", KSTAT_DATA_UINT64 },
 215  216          { "dir_cached_total",           KSTAT_DATA_UINT64 },
 216  217          { "dir_start_no_memory",        KSTAT_DATA_UINT64 },
 217  218          { "dir_add_no_memory",          KSTAT_DATA_UINT64 },
 218  219          { "dir_add_abort",              KSTAT_DATA_UINT64 },
 219  220          { "dir_add_max",                KSTAT_DATA_UINT64 },
 220  221          { "dir_remove_entry_fail",      KSTAT_DATA_UINT64 },
 221  222          { "dir_remove_space_fail",      KSTAT_DATA_UINT64 },
 222  223          { "dir_update_fail",            KSTAT_DATA_UINT64 },
 223  224          { "dir_fini_purge",             KSTAT_DATA_UINT64 },
 224  225          { "dir_reclaim_last",           KSTAT_DATA_UINT64 },
 225  226          { "dir_reclaim_any",            KSTAT_DATA_UINT64 },
 226  227  };
 227  228  
 228  229  static int doingcache = 1;
 229  230  
 230  231  vnode_t negative_cache_vnode;
 231  232  
 232  233  /*
 233  234   * Insert entry at the front of the queue
 234  235   */
 235  236  #define nc_inshash(ncp, hp) \
 236  237  { \
 237  238          (ncp)->hash_next = (hp)->hash_next; \
 238  239          (ncp)->hash_prev = (ncache_t *)(hp); \
 239  240          (hp)->hash_next->hash_prev = (ncp); \
 240  241          (hp)->hash_next = (ncp); \
 241  242  }
 242  243  
 243  244  /*
 244  245   * Remove entry from hash queue
 245  246   */
 246  247  #define nc_rmhash(ncp) \
 247  248  { \
 248  249          (ncp)->hash_prev->hash_next = (ncp)->hash_next; \
 249  250          (ncp)->hash_next->hash_prev = (ncp)->hash_prev; \
 250  251          (ncp)->hash_prev = NULL; \
 251  252          (ncp)->hash_next = NULL; \
 252  253  }
 253  254  
 254  255  /*
 255  256   * Free an entry.
 256  257   */
 257  258  #define dnlc_free(ncp) \
 258  259  { \
 259  260          kmem_free((ncp), sizeof (ncache_t) + (ncp)->namlen); \
 260  261          atomic_dec_32(&dnlc_nentries); \
 261  262  }
 262  263  
 263  264  
 264  265  /*
 265  266   * Cached directory info.
 266  267   * ======================
 267  268   */
 268  269  
 269  270  /*
 270  271   * Cached directory free space hash function.
 271  272   * Needs the free space handle and the dcp to get the hash table size
 272  273   * Returns the hash index.
 273  274   */
 274  275  #define DDFHASH(handle, dcp) ((handle >> 2) & (dcp)->dc_fhash_mask)
 275  276  
 276  277  /*
 277  278   * Cached directory name entry hash function.
 278  279   * Uses the name and returns in the input arguments the hash and the name
 279  280   * length.
 280  281   */
 281  282  #define DNLC_DIR_HASH(name, hash, namelen)                      \
 282  283          {                                                       \
 283  284                  char Xc;                                        \
 284  285                  const char *Xcp;                                \
 285  286                  hash = *name;                                   \
 286  287                  for (Xcp = (name + 1); (Xc = *Xcp) != 0; Xcp++) \
 287  288                          hash = (hash << 4) + hash + Xc;         \
 288  289                  ASSERT((Xcp - (name)) <= ((1 << NBBY) - 1));    \
 289  290                  namelen = Xcp - (name);                         \
 290  291          }
 291  292  
 292  293  /* special dircache_t pointer to indicate error should be returned */
 293  294  /*
 294  295   * The anchor directory cache pointer can contain 3 types of values,
 295  296   * 1) NULL: No directory cache
  
    | 
      ↓ open down ↓ | 
    138 lines elided | 
    
      ↑ open up ↑ | 
  
 296  297   * 2) DC_RET_LOW_MEM (-1): There was a directory cache that found to be
 297  298   *    too big or a memory shortage occurred. This value remains in the
 298  299   *    pointer until a dnlc_dir_start() which returns the a DNOMEM error.
 299  300   *    This is kludgy but efficient and only visible in this source file.
 300  301   * 3) A valid cache pointer.
 301  302   */
 302  303  #define DC_RET_LOW_MEM (dircache_t *)1
 303  304  #define VALID_DIR_CACHE(dcp) ((dircache_t *)(dcp) > DC_RET_LOW_MEM)
 304  305  
 305  306  /* Tunables */
 306      -uint_t dnlc_dir_enable = 1; /* disable caching directories by setting to 0 */
 307      -uint_t dnlc_dir_min_size = 40; /* min no of directory entries before caching */
 308      -uint_t dnlc_dir_max_size = UINT_MAX; /* ditto maximum */
      307 +volatile uint_t dnlc_dir_enable = 1;    /* disable caching directories by */
      308 +                                        /* setting to 0 */
      309 +volatile uint_t dnlc_dir_min_size = 40; /* min no of directory entries before */
      310 +                                        /* caching */
      311 +volatile uint_t dnlc_dir_max_size = UINT_MAX; /* ditto maximum */
 309  312  uint_t dnlc_dir_hash_size_shift = 3; /* 8 entries per hash bucket */
 310  313  uint_t dnlc_dir_min_reclaim =  350000; /* approx 1MB of dcentrys */
 311  314  /*
 312  315   * dnlc_dir_hash_resize_shift determines when the hash tables
 313  316   * get re-adjusted due to growth or shrinkage
 314  317   * - currently 2 indicating that there can be at most 4
 315  318   * times or at least one quarter the number of entries
 316  319   * before hash table readjustment. Note that with
 317  320   * dnlc_dir_hash_size_shift above set at 3 this would
 318  321   * mean readjustment would occur if the average number
 319  322   * of entries went above 32 or below 2
 320  323   */
 321  324  uint_t dnlc_dir_hash_resize_shift = 2; /* readjust rate */
 322  325  
 323  326  static kmem_cache_t *dnlc_dir_space_cache; /* free space entry cache */
 324  327  static dchead_t dc_head; /* anchor of cached directories */
 325  328  
 326  329  /* Prototypes */
 327  330  static ncache_t *dnlc_get(uchar_t namlen);
 328  331  static ncache_t *dnlc_search(vnode_t *dp, const char *name, uchar_t namlen,
 329  332      int hash);
 330  333  static void dnlc_dir_reclaim(void *unused);
 331  334  static void dnlc_dir_abort(dircache_t *dcp);
 332  335  static void dnlc_dir_adjust_fhash(dircache_t *dcp);
 333  336  static void dnlc_dir_adjust_nhash(dircache_t *dcp);
 334  337  static void do_dnlc_reduce_cache(void *);
 335  338  
 336  339  
 337  340  /*
 338  341   * Initialize the directory cache.
 339  342   */
 340  343  void
 341  344  dnlc_init()
 342  345  {
 343  346          nc_hash_t *hp;
 344  347          kstat_t *ksp;
 345  348          int i;
 346  349  
 347  350          /*
 348  351           * Set up the size of the dnlc (ncsize) and its low water mark.
 349  352           */
 350  353          if (ncsize == -1) {
 351  354                  /* calculate a reasonable size for the low water */
 352  355                  dnlc_nentries_low_water = 4 * (v.v_proc + maxusers) + 320;
 353  356                  ncsize = dnlc_nentries_low_water +
 354  357                      (dnlc_nentries_low_water / dnlc_low_water_divisor);
 355  358          } else {
 356  359                  /* don't change the user specified ncsize */
 357  360                  dnlc_nentries_low_water =
 358  361                      ncsize - (ncsize / dnlc_low_water_divisor);
 359  362          }
 360  363          if (ncsize <= 0) {
 361  364                  doingcache = 0;
 362  365                  dnlc_dir_enable = 0; /* also disable directory caching */
 363  366                  ncsize = 0;
 364  367                  cmn_err(CE_NOTE, "name cache (dnlc) disabled");
 365  368                  return;
 366  369          }
 367  370          dnlc_max_nentries = ncsize * 2;
 368  371          ncsize_onepercent = ncsize / 100;
 369  372          ncsize_min_percent = ncsize_onepercent * 3;
 370  373  
 371  374          /*
 372  375           * Initialise the hash table.
 373  376           * Compute hash size rounding to the next power of two.
 374  377           */
 375  378          nc_hashsz = ncsize / nc_hashavelen;
 376  379          nc_hashsz = 1 << highbit(nc_hashsz);
 377  380          nc_hashmask = nc_hashsz - 1;
 378  381          nc_hash = kmem_zalloc(nc_hashsz * sizeof (*nc_hash), KM_SLEEP);
 379  382          for (i = 0; i < nc_hashsz; i++) {
 380  383                  hp = (nc_hash_t *)&nc_hash[i];
 381  384                  mutex_init(&hp->hash_lock, NULL, MUTEX_DEFAULT, NULL);
 382  385                  hp->hash_next = (ncache_t *)hp;
 383  386                  hp->hash_prev = (ncache_t *)hp;
 384  387          }
 385  388  
 386  389          /*
 387  390           * Initialize rotors
 388  391           */
 389  392          dnlc_free_rotor = dnlc_purge_fs1_rotor = &nc_hash[0];
 390  393  
 391  394          /*
 392  395           * Set up the directory caching to use kmem_cache_alloc
 393  396           * for its free space entries so that we can get a callback
 394  397           * when the system is short on memory, to allow us to free
 395  398           * up some memory. we don't use the constructor/deconstructor
 396  399           * functions.
 397  400           */
 398  401          dnlc_dir_space_cache = kmem_cache_create("dnlc_space_cache",
 399  402              sizeof (dcfree_t), 0, NULL, NULL, dnlc_dir_reclaim, NULL,
 400  403              NULL, 0);
 401  404  
 402  405          /*
 403  406           * Initialise the head of the cached directory structures
 404  407           */
 405  408          mutex_init(&dc_head.dch_lock, NULL, MUTEX_DEFAULT, NULL);
 406  409          dc_head.dch_next = (dircache_t *)&dc_head;
 407  410          dc_head.dch_prev = (dircache_t *)&dc_head;
 408  411  
 409  412          /*
 410  413           * Put a hold on the negative cache vnode so that it never goes away
 411  414           * (VOP_INACTIVE isn't called on it).
 412  415           */
 413  416          vn_reinit(&negative_cache_vnode);
 414  417  
 415  418          /*
 416  419           * Initialise kstats - both the old compatability raw kind and
 417  420           * the more extensive named stats.
 418  421           */
 419  422          ksp = kstat_create("unix", 0, "ncstats", "misc", KSTAT_TYPE_RAW,
 420  423              sizeof (struct ncstats), KSTAT_FLAG_VIRTUAL);
 421  424          if (ksp) {
 422  425                  ksp->ks_data = (void *) &ncstats;
 423  426                  kstat_install(ksp);
 424  427          }
 425  428          ksp = kstat_create("unix", 0, "dnlcstats", "misc", KSTAT_TYPE_NAMED,
 426  429              sizeof (ncs) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
 427  430          if (ksp) {
 428  431                  ksp->ks_data = (void *) &ncs;
 429  432                  kstat_install(ksp);
 430  433          }
 431  434  }
 432  435  
 433  436  /*
 434  437   * Add a name to the directory cache.
 435  438   */
 436  439  void
 437  440  dnlc_enter(vnode_t *dp, const char *name, vnode_t *vp)
 438  441  {
 439  442          ncache_t *ncp;
 440  443          nc_hash_t *hp;
 441  444          uchar_t namlen;
 442  445          int hash;
 443  446  
 444  447          TRACE_0(TR_FAC_NFS, TR_DNLC_ENTER_START, "dnlc_enter_start:");
 445  448  
 446  449          if (!doingcache) {
 447  450                  TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
 448  451                      "dnlc_enter_end:(%S) %d", "not caching", 0);
 449  452                  return;
 450  453          }
 451  454  
 452  455          /*
 453  456           * Get a new dnlc entry. Assume the entry won't be in the cache
 454  457           * and initialize it now
 455  458           */
 456  459          DNLCHASH(name, dp, hash, namlen);
 457  460          if ((ncp = dnlc_get(namlen)) == NULL)
 458  461                  return;
 459  462          ncp->dp = dp;
 460  463          VN_HOLD_DNLC(dp);
 461  464          ncp->vp = vp;
 462  465          VN_HOLD_DNLC(vp);
 463  466          bcopy(name, ncp->name, namlen + 1); /* name and null */
 464  467          ncp->hash = hash;
 465  468          hp = &nc_hash[hash & nc_hashmask];
 466  469  
 467  470          mutex_enter(&hp->hash_lock);
 468  471          if (dnlc_search(dp, name, namlen, hash) != NULL) {
 469  472                  mutex_exit(&hp->hash_lock);
 470  473                  ncstats.dbl_enters++;
 471  474                  ncs.ncs_dbl_enters.value.ui64++;
 472  475                  VN_RELE_DNLC(dp);
 473  476                  VN_RELE_DNLC(vp);
 474  477                  dnlc_free(ncp);         /* crfree done here */
 475  478                  TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
 476  479                      "dnlc_enter_end:(%S) %d", "dbl enter", ncstats.dbl_enters);
 477  480                  return;
 478  481          }
 479  482          /*
 480  483           * Insert back into the hash chain.
 481  484           */
 482  485          nc_inshash(ncp, hp);
 483  486          mutex_exit(&hp->hash_lock);
 484  487          ncstats.enters++;
 485  488          ncs.ncs_enters.value.ui64++;
 486  489          TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
 487  490              "dnlc_enter_end:(%S) %d", "done", ncstats.enters);
 488  491  }
 489  492  
 490  493  /*
 491  494   * Add a name to the directory cache.
 492  495   *
 493  496   * This function is basically identical with
 494  497   * dnlc_enter().  The difference is that when the
 495  498   * desired dnlc entry is found, the vnode in the
 496  499   * ncache is compared with the vnode passed in.
 497  500   *
 498  501   * If they are not equal then the ncache is
 499  502   * updated with the passed in vnode.  Otherwise
 500  503   * it just frees up the newly allocated dnlc entry.
 501  504   */
 502  505  void
 503  506  dnlc_update(vnode_t *dp, const char *name, vnode_t *vp)
 504  507  {
 505  508          ncache_t *ncp;
 506  509          ncache_t *tcp;
 507  510          vnode_t *tvp;
 508  511          nc_hash_t *hp;
 509  512          int hash;
 510  513          uchar_t namlen;
 511  514  
 512  515          TRACE_0(TR_FAC_NFS, TR_DNLC_ENTER_START, "dnlc_update_start:");
 513  516  
 514  517          if (!doingcache) {
 515  518                  TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
 516  519                      "dnlc_update_end:(%S) %d", "not caching", 0);
 517  520                  return;
 518  521          }
 519  522  
 520  523          /*
 521  524           * Get a new dnlc entry and initialize it now.
 522  525           * If we fail to get a new entry, call dnlc_remove() to purge
 523  526           * any existing dnlc entry including negative cache (DNLC_NO_VNODE)
 524  527           * entry.
 525  528           * Failure to clear an existing entry could result in false dnlc
 526  529           * lookup (negative/stale entry).
 527  530           */
 528  531          DNLCHASH(name, dp, hash, namlen);
 529  532          if ((ncp = dnlc_get(namlen)) == NULL) {
 530  533                  dnlc_remove(dp, name);
 531  534                  return;
 532  535          }
 533  536          ncp->dp = dp;
 534  537          VN_HOLD_DNLC(dp);
 535  538          ncp->vp = vp;
 536  539          VN_HOLD_DNLC(vp);
 537  540          bcopy(name, ncp->name, namlen + 1); /* name and null */
 538  541          ncp->hash = hash;
 539  542          hp = &nc_hash[hash & nc_hashmask];
 540  543  
 541  544          mutex_enter(&hp->hash_lock);
 542  545          if ((tcp = dnlc_search(dp, name, namlen, hash)) != NULL) {
 543  546                  if (tcp->vp != vp) {
 544  547                          tvp = tcp->vp;
 545  548                          tcp->vp = vp;
 546  549                          mutex_exit(&hp->hash_lock);
 547  550                          VN_RELE_DNLC(tvp);
 548  551                          ncstats.enters++;
 549  552                          ncs.ncs_enters.value.ui64++;
 550  553                          TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
 551  554                              "dnlc_update_end:(%S) %d", "done", ncstats.enters);
 552  555                  } else {
 553  556                          mutex_exit(&hp->hash_lock);
 554  557                          VN_RELE_DNLC(vp);
 555  558                          ncstats.dbl_enters++;
 556  559                          ncs.ncs_dbl_enters.value.ui64++;
 557  560                          TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
 558  561                              "dnlc_update_end:(%S) %d",
 559  562                              "dbl enter", ncstats.dbl_enters);
 560  563                  }
 561  564                  VN_RELE_DNLC(dp);
 562  565                  dnlc_free(ncp);         /* crfree done here */
 563  566                  return;
 564  567          }
 565  568          /*
 566  569           * insert the new entry, since it is not in dnlc yet
 567  570           */
 568  571          nc_inshash(ncp, hp);
 569  572          mutex_exit(&hp->hash_lock);
 570  573          ncstats.enters++;
 571  574          ncs.ncs_enters.value.ui64++;
 572  575          TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
 573  576              "dnlc_update_end:(%S) %d", "done", ncstats.enters);
 574  577  }
 575  578  
 576  579  /*
 577  580   * Look up a name in the directory name cache.
 578  581   *
 579  582   * Return a doubly-held vnode if found: one hold so that it may
 580  583   * remain in the cache for other users, the other hold so that
 581  584   * the cache is not re-cycled and the identity of the vnode is
 582  585   * lost before the caller can use the vnode.
 583  586   */
 584  587  vnode_t *
 585  588  dnlc_lookup(vnode_t *dp, const char *name)
 586  589  {
 587  590          ncache_t *ncp;
 588  591          nc_hash_t *hp;
 589  592          vnode_t *vp;
 590  593          int hash, depth;
 591  594          uchar_t namlen;
 592  595  
 593  596          TRACE_2(TR_FAC_NFS, TR_DNLC_LOOKUP_START,
 594  597              "dnlc_lookup_start:dp %x name %s", dp, name);
 595  598  
 596  599          if (!doingcache) {
 597  600                  TRACE_4(TR_FAC_NFS, TR_DNLC_LOOKUP_END,
 598  601                      "dnlc_lookup_end:%S %d vp %x name %s",
 599  602                      "not_caching", 0, NULL, name);
 600  603                  return (NULL);
 601  604          }
 602  605  
 603  606          DNLCHASH(name, dp, hash, namlen);
 604  607          depth = 1;
 605  608          hp = &nc_hash[hash & nc_hashmask];
 606  609          mutex_enter(&hp->hash_lock);
 607  610  
 608  611          for (ncp = hp->hash_next; ncp != (ncache_t *)hp;
 609  612              ncp = ncp->hash_next) {
 610  613                  if (ncp->hash == hash &&        /* fast signature check */
 611  614                      ncp->dp == dp &&
 612  615                      ncp->namlen == namlen &&
 613  616                      bcmp(ncp->name, name, namlen) == 0) {
 614  617                          /*
 615  618                           * Move this entry to the head of its hash chain
 616  619                           * if it's not already close.
 617  620                           */
 618  621                          if (depth > NC_MOVETOFRONT) {
 619  622                                  ncache_t *next = ncp->hash_next;
 620  623                                  ncache_t *prev = ncp->hash_prev;
 621  624  
 622  625                                  prev->hash_next = next;
 623  626                                  next->hash_prev = prev;
 624  627                                  ncp->hash_next = next = hp->hash_next;
 625  628                                  ncp->hash_prev = (ncache_t *)hp;
 626  629                                  next->hash_prev = ncp;
 627  630                                  hp->hash_next = ncp;
 628  631  
 629  632                                  ncstats.move_to_front++;
 630  633                          }
 631  634  
 632  635                          /*
 633  636                           * Put a hold on the vnode now so its identity
 634  637                           * can't change before the caller has a chance to
 635  638                           * put a hold on it.
 636  639                           */
 637  640                          vp = ncp->vp;
 638  641                          VN_HOLD_CALLER(vp);
 639  642                          mutex_exit(&hp->hash_lock);
 640  643                          ncstats.hits++;
 641  644                          ncs.ncs_hits.value.ui64++;
 642  645                          if (vp == DNLC_NO_VNODE) {
 643  646                                  ncs.ncs_neg_hits.value.ui64++;
 644  647                          }
 645  648                          TRACE_4(TR_FAC_NFS, TR_DNLC_LOOKUP_END,
 646  649                              "dnlc_lookup_end:%S %d vp %x name %s", "hit",
 647  650                              ncstats.hits, vp, name);
 648  651                          return (vp);
 649  652                  }
 650  653                  depth++;
 651  654          }
 652  655  
 653  656          mutex_exit(&hp->hash_lock);
 654  657          ncstats.misses++;
 655  658          ncs.ncs_misses.value.ui64++;
 656  659          TRACE_4(TR_FAC_NFS, TR_DNLC_LOOKUP_END,
 657  660              "dnlc_lookup_end:%S %d vp %x name %s", "miss", ncstats.misses,
 658  661              NULL, name);
 659  662          return (NULL);
 660  663  }
 661  664  
 662  665  /*
 663  666   * Remove an entry in the directory name cache.
 664  667   */
 665  668  void
 666  669  dnlc_remove(vnode_t *dp, const char *name)
 667  670  {
 668  671          ncache_t *ncp;
 669  672          nc_hash_t *hp;
 670  673          uchar_t namlen;
 671  674          int hash;
 672  675  
 673  676          if (!doingcache)
 674  677                  return;
 675  678          DNLCHASH(name, dp, hash, namlen);
 676  679          hp = &nc_hash[hash & nc_hashmask];
 677  680  
 678  681          mutex_enter(&hp->hash_lock);
 679  682          if (ncp = dnlc_search(dp, name, namlen, hash)) {
 680  683                  /*
 681  684                   * Free up the entry
 682  685                   */
 683  686                  nc_rmhash(ncp);
 684  687                  mutex_exit(&hp->hash_lock);
 685  688                  VN_RELE_DNLC(ncp->vp);
 686  689                  VN_RELE_DNLC(ncp->dp);
 687  690                  dnlc_free(ncp);
 688  691                  return;
 689  692          }
 690  693          mutex_exit(&hp->hash_lock);
 691  694  }
 692  695  
 693  696  /*
 694  697   * Purge the entire cache.
 695  698   */
 696  699  void
 697  700  dnlc_purge()
 698  701  {
 699  702          nc_hash_t *nch;
 700  703          ncache_t *ncp;
 701  704          int index;
 702  705          int i;
 703  706          vnode_t *nc_rele[DNLC_MAX_RELE];
 704  707  
 705  708          if (!doingcache)
 706  709                  return;
 707  710  
 708  711          ncstats.purges++;
 709  712          ncs.ncs_purge_all.value.ui64++;
 710  713  
 711  714          for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
 712  715                  index = 0;
 713  716                  mutex_enter(&nch->hash_lock);
 714  717                  ncp = nch->hash_next;
 715  718                  while (ncp != (ncache_t *)nch) {
 716  719                          ncache_t *np;
 717  720  
 718  721                          np = ncp->hash_next;
 719  722                          nc_rele[index++] = ncp->vp;
 720  723                          nc_rele[index++] = ncp->dp;
 721  724  
 722  725                          nc_rmhash(ncp);
 723  726                          dnlc_free(ncp);
 724  727                          ncp = np;
 725  728                          ncs.ncs_purge_total.value.ui64++;
 726  729                          if (index == DNLC_MAX_RELE)
 727  730                                  break;
 728  731                  }
 729  732                  mutex_exit(&nch->hash_lock);
 730  733  
 731  734                  /* Release holds on all the vnodes now that we have no locks */
 732  735                  for (i = 0; i < index; i++) {
 733  736                          VN_RELE_DNLC(nc_rele[i]);
 734  737                  }
 735  738                  if (ncp != (ncache_t *)nch) {
 736  739                          nch--; /* Do current hash chain again */
 737  740                  }
 738  741          }
 739  742  }
 740  743  
 741  744  /*
 742  745   * Purge any cache entries referencing a vnode. Exit as soon as the dnlc
 743  746   * reference count goes to zero (the caller still holds a reference).
 744  747   */
 745  748  void
 746  749  dnlc_purge_vp(vnode_t *vp)
 747  750  {
 748  751          nc_hash_t *nch;
 749  752          ncache_t *ncp;
 750  753          int index;
 751  754          vnode_t *nc_rele[DNLC_MAX_RELE];
 752  755  
 753  756          ASSERT(vp->v_count > 0);
 754  757          if (vp->v_count_dnlc == 0) {
 755  758                  return;
 756  759          }
 757  760  
 758  761          if (!doingcache)
 759  762                  return;
 760  763  
 761  764          ncstats.purges++;
 762  765          ncs.ncs_purge_vp.value.ui64++;
 763  766  
 764  767          for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
 765  768                  index = 0;
 766  769                  mutex_enter(&nch->hash_lock);
 767  770                  ncp = nch->hash_next;
 768  771                  while (ncp != (ncache_t *)nch) {
 769  772                          ncache_t *np;
 770  773  
 771  774                          np = ncp->hash_next;
 772  775                          if (ncp->dp == vp || ncp->vp == vp) {
 773  776                                  nc_rele[index++] = ncp->vp;
 774  777                                  nc_rele[index++] = ncp->dp;
 775  778                                  nc_rmhash(ncp);
 776  779                                  dnlc_free(ncp);
 777  780                                  ncs.ncs_purge_total.value.ui64++;
 778  781                                  if (index == DNLC_MAX_RELE) {
 779  782                                          ncp = np;
 780  783                                          break;
 781  784                                  }
 782  785                          }
 783  786                          ncp = np;
 784  787                  }
 785  788                  mutex_exit(&nch->hash_lock);
 786  789  
 787  790                  /* Release holds on all the vnodes now that we have no locks */
 788  791                  while (index) {
 789  792                          VN_RELE_DNLC(nc_rele[--index]);
 790  793                  }
 791  794  
 792  795                  if (vp->v_count_dnlc == 0) {
 793  796                          return;
 794  797                  }
 795  798  
 796  799                  if (ncp != (ncache_t *)nch) {
 797  800                          nch--; /* Do current hash chain again */
 798  801                  }
 799  802          }
 800  803  }
 801  804  
 802  805  /*
 803  806   * Purge cache entries referencing a vfsp.  Caller supplies a count
 804  807   * of entries to purge; up to that many will be freed.  A count of
 805  808   * zero indicates that all such entries should be purged.  Returns
 806  809   * the number of entries that were purged.
 807  810   */
 808  811  int
 809  812  dnlc_purge_vfsp(vfs_t *vfsp, int count)
 810  813  {
 811  814          nc_hash_t *nch;
 812  815          ncache_t *ncp;
 813  816          int n = 0;
 814  817          int index;
 815  818          int i;
 816  819          vnode_t *nc_rele[DNLC_MAX_RELE];
 817  820  
 818  821          if (!doingcache)
 819  822                  return (0);
 820  823  
 821  824          ncstats.purges++;
 822  825          ncs.ncs_purge_vfs.value.ui64++;
 823  826  
 824  827          for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
 825  828                  index = 0;
 826  829                  mutex_enter(&nch->hash_lock);
 827  830                  ncp = nch->hash_next;
 828  831                  while (ncp != (ncache_t *)nch) {
 829  832                          ncache_t *np;
 830  833  
 831  834                          np = ncp->hash_next;
 832  835                          ASSERT(ncp->dp != NULL);
 833  836                          ASSERT(ncp->vp != NULL);
 834  837                          if ((ncp->dp->v_vfsp == vfsp) ||
 835  838                              (ncp->vp->v_vfsp == vfsp)) {
 836  839                                  n++;
 837  840                                  nc_rele[index++] = ncp->vp;
 838  841                                  nc_rele[index++] = ncp->dp;
 839  842                                  nc_rmhash(ncp);
 840  843                                  dnlc_free(ncp);
 841  844                                  ncs.ncs_purge_total.value.ui64++;
 842  845                                  if (index == DNLC_MAX_RELE) {
 843  846                                          ncp = np;
 844  847                                          break;
 845  848                                  }
 846  849                                  if (count != 0 && n >= count) {
 847  850                                          break;
 848  851                                  }
 849  852                          }
 850  853                          ncp = np;
 851  854                  }
 852  855                  mutex_exit(&nch->hash_lock);
 853  856                  /* Release holds on all the vnodes now that we have no locks */
 854  857                  for (i = 0; i < index; i++) {
 855  858                          VN_RELE_DNLC(nc_rele[i]);
 856  859                  }
 857  860                  if (count != 0 && n >= count) {
 858  861                          return (n);
 859  862                  }
 860  863                  if (ncp != (ncache_t *)nch) {
 861  864                          nch--; /* Do current hash chain again */
 862  865                  }
 863  866          }
 864  867          return (n);
 865  868  }
 866  869  
 867  870  /*
 868  871   * Purge 1 entry from the dnlc that is part of the filesystem(s)
 869  872   * represented by 'vop'. The purpose of this routine is to allow
 870  873   * users of the dnlc to free a vnode that is being held by the dnlc.
 871  874   *
 872  875   * If we find a vnode that we release which will result in
 873  876   * freeing the underlying vnode (count was 1), return 1, 0
 874  877   * if no appropriate vnodes found.
 875  878   *
 876  879   * Note, vop is not the 'right' identifier for a filesystem.
 877  880   */
 878  881  int
 879  882  dnlc_fs_purge1(vnodeops_t *vop)
 880  883  {
 881  884          nc_hash_t *end;
 882  885          nc_hash_t *hp;
 883  886          ncache_t *ncp;
 884  887          vnode_t *vp;
 885  888  
 886  889          if (!doingcache)
 887  890                  return (0);
 888  891  
 889  892          ncs.ncs_purge_fs1.value.ui64++;
 890  893  
 891  894          /*
 892  895           * Scan the dnlc entries looking for a likely candidate.
 893  896           */
 894  897          hp = end = dnlc_purge_fs1_rotor;
 895  898  
 896  899          do {
 897  900                  if (++hp == &nc_hash[nc_hashsz])
 898  901                          hp = nc_hash;
 899  902                  dnlc_purge_fs1_rotor = hp;
 900  903                  if (hp->hash_next == (ncache_t *)hp)
 901  904                          continue;
 902  905                  mutex_enter(&hp->hash_lock);
 903  906                  for (ncp = hp->hash_prev;
 904  907                      ncp != (ncache_t *)hp;
 905  908                      ncp = ncp->hash_prev) {
 906  909                          vp = ncp->vp;
 907  910                          if (!vn_has_cached_data(vp) && (vp->v_count == 1) &&
 908  911                              vn_matchops(vp, vop))
 909  912                                  break;
 910  913                  }
 911  914                  if (ncp != (ncache_t *)hp) {
 912  915                          nc_rmhash(ncp);
 913  916                          mutex_exit(&hp->hash_lock);
 914  917                          VN_RELE_DNLC(ncp->dp);
 915  918                          VN_RELE_DNLC(vp)
 916  919                          dnlc_free(ncp);
 917  920                          ncs.ncs_purge_total.value.ui64++;
 918  921                          return (1);
 919  922                  }
 920  923                  mutex_exit(&hp->hash_lock);
 921  924          } while (hp != end);
 922  925          return (0);
 923  926  }
 924  927  
 925  928  /*
 926  929   * Utility routine to search for a cache entry. Return the
 927  930   * ncache entry if found, NULL otherwise.
 928  931   */
 929  932  static ncache_t *
 930  933  dnlc_search(vnode_t *dp, const char *name, uchar_t namlen, int hash)
 931  934  {
 932  935          nc_hash_t *hp;
 933  936          ncache_t *ncp;
 934  937  
 935  938          hp = &nc_hash[hash & nc_hashmask];
 936  939  
 937  940          for (ncp = hp->hash_next; ncp != (ncache_t *)hp; ncp = ncp->hash_next) {
 938  941                  if (ncp->hash == hash &&
 939  942                      ncp->dp == dp &&
 940  943                      ncp->namlen == namlen &&
 941  944                      bcmp(ncp->name, name, namlen) == 0)
 942  945                          return (ncp);
 943  946          }
 944  947          return (NULL);
 945  948  }
 946  949  
 947  950  #if ((1 << NBBY) - 1) < (MAXNAMELEN - 1)
 948  951  #error ncache_t name length representation is too small
 949  952  #endif
 950  953  
 951  954  void
 952  955  dnlc_reduce_cache(void *reduce_percent)
 953  956  {
 954  957          if (dnlc_reduce_idle && (dnlc_nentries >= ncsize || reduce_percent)) {
 955  958                  dnlc_reduce_idle = 0;
 956  959                  if ((taskq_dispatch(system_taskq, do_dnlc_reduce_cache,
 957  960                      reduce_percent, TQ_NOSLEEP)) == NULL)
 958  961                          dnlc_reduce_idle = 1;
 959  962          }
 960  963  }
 961  964  
 962  965  /*
 963  966   * Get a new name cache entry.
 964  967   * If the dnlc_reduce_cache() taskq isn't keeping up with demand, or memory
 965  968   * is short then just return NULL. If we're over ncsize then kick off a
 966  969   * thread to free some in use entries down to dnlc_nentries_low_water.
 967  970   * Caller must initialise all fields except namlen.
 968  971   * Component names are defined to be less than MAXNAMELEN
 969  972   * which includes a null.
 970  973   */
 971  974  static ncache_t *
 972  975  dnlc_get(uchar_t namlen)
 973  976  {
 974  977          ncache_t *ncp;
 975  978  
 976  979          if (dnlc_nentries > dnlc_max_nentries) {
 977  980                  dnlc_max_nentries_cnt++; /* keep a statistic */
 978  981                  return (NULL);
 979  982          }
 980  983          ncp = kmem_alloc(sizeof (ncache_t) + namlen, KM_NOSLEEP);
 981  984          if (ncp == NULL) {
 982  985                  return (NULL);
 983  986          }
 984  987          ncp->namlen = namlen;
 985  988          atomic_inc_32(&dnlc_nentries);
 986  989          dnlc_reduce_cache(NULL);
 987  990          return (ncp);
 988  991  }
 989  992  
 990  993  /*
 991  994   * Taskq routine to free up name cache entries to reduce the
 992  995   * cache size to the low water mark if "reduce_percent" is not provided.
 993  996   * If "reduce_percent" is provided, reduce cache size by
 994  997   * (ncsize_onepercent * reduce_percent).
 995  998   */
 996  999  /*ARGSUSED*/
 997 1000  static void
 998 1001  do_dnlc_reduce_cache(void *reduce_percent)
 999 1002  {
1000 1003          nc_hash_t *hp = dnlc_free_rotor, *start_hp = hp;
1001 1004          vnode_t *vp;
1002 1005          ncache_t *ncp;
1003 1006          int cnt;
1004 1007          uint_t low_water = dnlc_nentries_low_water;
1005 1008  
1006 1009          if (reduce_percent) {
1007 1010                  uint_t reduce_cnt;
1008 1011  
1009 1012                  /*
1010 1013                   * Never try to reduce the current number
1011 1014                   * of cache entries below 3% of ncsize.
1012 1015                   */
1013 1016                  if (dnlc_nentries <= ncsize_min_percent) {
1014 1017                          dnlc_reduce_idle = 1;
1015 1018                          return;
1016 1019                  }
1017 1020                  reduce_cnt = ncsize_onepercent *
1018 1021                      (uint_t)(uintptr_t)reduce_percent;
1019 1022  
1020 1023                  if (reduce_cnt > dnlc_nentries ||
1021 1024                      dnlc_nentries - reduce_cnt < ncsize_min_percent)
1022 1025                          low_water = ncsize_min_percent;
1023 1026                  else
1024 1027                          low_water = dnlc_nentries - reduce_cnt;
1025 1028          }
1026 1029  
1027 1030          do {
1028 1031                  /*
1029 1032                   * Find the first non empty hash queue without locking.
1030 1033                   * Only look at each hash queue once to avoid an infinite loop.
1031 1034                   */
1032 1035                  do {
1033 1036                          if (++hp == &nc_hash[nc_hashsz])
1034 1037                                  hp = nc_hash;
1035 1038                  } while (hp->hash_next == (ncache_t *)hp && hp != start_hp);
1036 1039  
1037 1040                  /* return if all hash queues are empty. */
1038 1041                  if (hp->hash_next == (ncache_t *)hp) {
1039 1042                          dnlc_reduce_idle = 1;
1040 1043                          return;
1041 1044                  }
1042 1045  
1043 1046                  mutex_enter(&hp->hash_lock);
1044 1047                  for (cnt = 0, ncp = hp->hash_prev; ncp != (ncache_t *)hp;
1045 1048                      ncp = ncp->hash_prev, cnt++) {
1046 1049                          vp = ncp->vp;
1047 1050                          /*
1048 1051                           * A name cache entry with a reference count
1049 1052                           * of one is only referenced by the dnlc.
1050 1053                           * Also negative cache entries are purged first.
1051 1054                           */
1052 1055                          if (!vn_has_cached_data(vp) &&
1053 1056                              ((vp->v_count == 1) || (vp == DNLC_NO_VNODE))) {
1054 1057                                  ncs.ncs_pick_heur.value.ui64++;
1055 1058                                  goto found;
1056 1059                          }
1057 1060                          /*
1058 1061                           * Remove from the end of the chain if the
1059 1062                           * chain is too long
1060 1063                           */
1061 1064                          if (cnt > dnlc_long_chain) {
1062 1065                                  ncp = hp->hash_prev;
1063 1066                                  ncs.ncs_pick_last.value.ui64++;
1064 1067                                  vp = ncp->vp;
1065 1068                                  goto found;
1066 1069                          }
1067 1070                  }
1068 1071                  /* check for race and continue */
1069 1072                  if (hp->hash_next == (ncache_t *)hp) {
1070 1073                          mutex_exit(&hp->hash_lock);
1071 1074                          continue;
1072 1075                  }
1073 1076  
1074 1077                  ncp = hp->hash_prev; /* pick the last one in the hash queue */
1075 1078                  ncs.ncs_pick_last.value.ui64++;
1076 1079                  vp = ncp->vp;
1077 1080  found:
1078 1081                  /*
1079 1082                   * Remove from hash chain.
1080 1083                   */
1081 1084                  nc_rmhash(ncp);
1082 1085                  mutex_exit(&hp->hash_lock);
1083 1086                  VN_RELE_DNLC(vp);
1084 1087                  VN_RELE_DNLC(ncp->dp);
1085 1088                  dnlc_free(ncp);
1086 1089          } while (dnlc_nentries > low_water);
1087 1090  
1088 1091          dnlc_free_rotor = hp;
1089 1092          dnlc_reduce_idle = 1;
1090 1093  }
1091 1094  
1092 1095  /*
1093 1096   * Directory caching routines
1094 1097   * ==========================
1095 1098   *
1096 1099   * See dnlc.h for details of the interfaces below.
1097 1100   */
1098 1101  
1099 1102  /*
1100 1103   * Lookup up an entry in a complete or partial directory cache.
1101 1104   */
1102 1105  dcret_t
1103 1106  dnlc_dir_lookup(dcanchor_t *dcap, const char *name, uint64_t *handle)
1104 1107  {
1105 1108          dircache_t *dcp;
1106 1109          dcentry_t *dep;
1107 1110          int hash;
1108 1111          int ret;
1109 1112          uchar_t namlen;
1110 1113  
1111 1114          /*
1112 1115           * can test without lock as we are only a cache
1113 1116           */
1114 1117          if (!VALID_DIR_CACHE(dcap->dca_dircache)) {
1115 1118                  ncs.ncs_dir_misses.value.ui64++;
1116 1119                  return (DNOCACHE);
1117 1120          }
1118 1121  
1119 1122          if (!dnlc_dir_enable) {
1120 1123                  return (DNOCACHE);
1121 1124          }
1122 1125  
1123 1126          mutex_enter(&dcap->dca_lock);
1124 1127          dcp = (dircache_t *)dcap->dca_dircache;
1125 1128          if (VALID_DIR_CACHE(dcp)) {
1126 1129                  dcp->dc_actime = ddi_get_lbolt64();
1127 1130                  DNLC_DIR_HASH(name, hash, namlen);
1128 1131                  dep = dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1129 1132                  while (dep != NULL) {
1130 1133                          if ((dep->de_hash == hash) &&
1131 1134                              (namlen == dep->de_namelen) &&
1132 1135                              bcmp(dep->de_name, name, namlen) == 0) {
1133 1136                                  *handle = dep->de_handle;
1134 1137                                  mutex_exit(&dcap->dca_lock);
1135 1138                                  ncs.ncs_dir_hits.value.ui64++;
1136 1139                                  return (DFOUND);
1137 1140                          }
1138 1141                          dep = dep->de_next;
1139 1142                  }
1140 1143                  if (dcp->dc_complete) {
1141 1144                          ret = DNOENT;
1142 1145                  } else {
1143 1146                          ret = DNOCACHE;
1144 1147                  }
1145 1148                  mutex_exit(&dcap->dca_lock);
1146 1149                  return (ret);
1147 1150          } else {
1148 1151                  mutex_exit(&dcap->dca_lock);
1149 1152                  ncs.ncs_dir_misses.value.ui64++;
1150 1153                  return (DNOCACHE);
1151 1154          }
1152 1155  }
1153 1156  
1154 1157  /*
1155 1158   * Start a new directory cache. An estimate of the number of
1156 1159   * entries is provided to as a quick check to ensure the directory
1157 1160   * is cacheable.
1158 1161   */
1159 1162  dcret_t
1160 1163  dnlc_dir_start(dcanchor_t *dcap, uint_t num_entries)
1161 1164  {
1162 1165          dircache_t *dcp;
1163 1166  
1164 1167          if (!dnlc_dir_enable ||
1165 1168              (num_entries < dnlc_dir_min_size)) {
1166 1169                  return (DNOCACHE);
1167 1170          }
1168 1171  
1169 1172          if (num_entries > dnlc_dir_max_size) {
1170 1173                  return (DTOOBIG);
1171 1174          }
1172 1175  
1173 1176          mutex_enter(&dc_head.dch_lock);
1174 1177          mutex_enter(&dcap->dca_lock);
1175 1178  
1176 1179          if (dcap->dca_dircache == DC_RET_LOW_MEM) {
1177 1180                  dcap->dca_dircache = NULL;
1178 1181                  mutex_exit(&dcap->dca_lock);
1179 1182                  mutex_exit(&dc_head.dch_lock);
1180 1183                  return (DNOMEM);
1181 1184          }
1182 1185  
1183 1186          /*
1184 1187           * Check if there's currently a cache.
1185 1188           * This probably only occurs on a race.
1186 1189           */
1187 1190          if (dcap->dca_dircache != NULL) {
1188 1191                  mutex_exit(&dcap->dca_lock);
1189 1192                  mutex_exit(&dc_head.dch_lock);
1190 1193                  return (DNOCACHE);
1191 1194          }
1192 1195  
1193 1196          /*
1194 1197           * Allocate the dircache struct, entry and free space hash tables.
1195 1198           * These tables are initially just one entry but dynamically resize
1196 1199           * when entries and free space are added or removed.
1197 1200           */
1198 1201          if ((dcp = kmem_zalloc(sizeof (dircache_t), KM_NOSLEEP)) == NULL) {
1199 1202                  goto error;
1200 1203          }
1201 1204          if ((dcp->dc_namehash = kmem_zalloc(sizeof (dcentry_t *),
1202 1205              KM_NOSLEEP)) == NULL) {
1203 1206                  goto error;
1204 1207          }
1205 1208          if ((dcp->dc_freehash = kmem_zalloc(sizeof (dcfree_t *),
1206 1209              KM_NOSLEEP)) == NULL) {
1207 1210                  goto error;
1208 1211          }
1209 1212  
1210 1213          dcp->dc_anchor = dcap; /* set back pointer to anchor */
1211 1214          dcap->dca_dircache = dcp;
1212 1215  
1213 1216          /* add into head of global chain */
1214 1217          dcp->dc_next = dc_head.dch_next;
1215 1218          dcp->dc_prev = (dircache_t *)&dc_head;
1216 1219          dcp->dc_next->dc_prev = dcp;
1217 1220          dc_head.dch_next = dcp;
1218 1221  
1219 1222          mutex_exit(&dcap->dca_lock);
1220 1223          mutex_exit(&dc_head.dch_lock);
1221 1224          ncs.ncs_cur_dirs.value.ui64++;
1222 1225          ncs.ncs_dirs_cached.value.ui64++;
1223 1226          return (DOK);
1224 1227  error:
1225 1228          if (dcp != NULL) {
1226 1229                  if (dcp->dc_namehash) {
1227 1230                          kmem_free(dcp->dc_namehash, sizeof (dcentry_t *));
1228 1231                  }
1229 1232                  kmem_free(dcp, sizeof (dircache_t));
1230 1233          }
1231 1234          /*
1232 1235           * Must also kmem_free dcp->dc_freehash if more error cases are added
1233 1236           */
1234 1237          mutex_exit(&dcap->dca_lock);
1235 1238          mutex_exit(&dc_head.dch_lock);
1236 1239          ncs.ncs_dir_start_nm.value.ui64++;
1237 1240          return (DNOCACHE);
1238 1241  }
1239 1242  
1240 1243  /*
1241 1244   * Add a directopry entry to a partial or complete directory cache.
1242 1245   */
1243 1246  dcret_t
1244 1247  dnlc_dir_add_entry(dcanchor_t *dcap, const char *name, uint64_t handle)
1245 1248  {
1246 1249          dircache_t *dcp;
1247 1250          dcentry_t **hp, *dep;
1248 1251          int hash;
1249 1252          uint_t capacity;
1250 1253          uchar_t namlen;
1251 1254  
1252 1255          /*
1253 1256           * Allocate the dcentry struct, including the variable
1254 1257           * size name. Note, the null terminator is not copied.
1255 1258           *
1256 1259           * We do this outside the lock to avoid possible deadlock if
1257 1260           * dnlc_dir_reclaim() is called as a result of memory shortage.
1258 1261           */
1259 1262          DNLC_DIR_HASH(name, hash, namlen);
1260 1263          dep = kmem_alloc(sizeof (dcentry_t) - 1 + namlen, KM_NOSLEEP);
1261 1264          if (dep == NULL) {
1262 1265  #ifdef DEBUG
1263 1266                  /*
1264 1267                   * The kmem allocator generates random failures for
1265 1268                   * KM_NOSLEEP calls (see KMEM_RANDOM_ALLOCATION_FAILURE)
1266 1269                   * So try again before we blow away a perfectly good cache.
1267 1270                   * This is done not to cover an error but purely for
1268 1271                   * performance running a debug kernel.
1269 1272                   * This random error only occurs in debug mode.
1270 1273                   */
1271 1274                  dep = kmem_alloc(sizeof (dcentry_t) - 1 + namlen, KM_NOSLEEP);
1272 1275                  if (dep != NULL)
1273 1276                          goto ok;
1274 1277  #endif
1275 1278                  ncs.ncs_dir_add_nm.value.ui64++;
1276 1279                  /*
1277 1280                   * Free a directory cache. This may be the one we are
1278 1281                   * called with.
1279 1282                   */
1280 1283                  dnlc_dir_reclaim(NULL);
1281 1284                  dep = kmem_alloc(sizeof (dcentry_t) - 1 + namlen, KM_NOSLEEP);
1282 1285                  if (dep == NULL) {
1283 1286                          /*
1284 1287                           * still no memory, better delete this cache
1285 1288                           */
1286 1289                          mutex_enter(&dcap->dca_lock);
1287 1290                          dcp = (dircache_t *)dcap->dca_dircache;
1288 1291                          if (VALID_DIR_CACHE(dcp)) {
1289 1292                                  dnlc_dir_abort(dcp);
1290 1293                                  dcap->dca_dircache = DC_RET_LOW_MEM;
1291 1294                          }
1292 1295                          mutex_exit(&dcap->dca_lock);
1293 1296                          ncs.ncs_dir_addabort.value.ui64++;
1294 1297                          return (DNOCACHE);
1295 1298                  }
1296 1299                  /*
1297 1300                   * fall through as if the 1st kmem_alloc had worked
1298 1301                   */
1299 1302          }
1300 1303  #ifdef DEBUG
1301 1304  ok:
1302 1305  #endif
1303 1306          mutex_enter(&dcap->dca_lock);
1304 1307          dcp = (dircache_t *)dcap->dca_dircache;
1305 1308          if (VALID_DIR_CACHE(dcp)) {
1306 1309                  /*
1307 1310                   * If the total number of entries goes above the max
1308 1311                   * then free this cache
1309 1312                   */
1310 1313                  if ((dcp->dc_num_entries + dcp->dc_num_free) >
1311 1314                      dnlc_dir_max_size) {
1312 1315                          mutex_exit(&dcap->dca_lock);
1313 1316                          dnlc_dir_purge(dcap);
1314 1317                          kmem_free(dep, sizeof (dcentry_t) - 1 + namlen);
1315 1318                          ncs.ncs_dir_add_max.value.ui64++;
1316 1319                          return (DTOOBIG);
1317 1320                  }
1318 1321                  dcp->dc_num_entries++;
1319 1322                  capacity = (dcp->dc_nhash_mask + 1) << dnlc_dir_hash_size_shift;
1320 1323                  if (dcp->dc_num_entries >=
1321 1324                      (capacity << dnlc_dir_hash_resize_shift)) {
1322 1325                          dnlc_dir_adjust_nhash(dcp);
1323 1326                  }
1324 1327                  hp = &dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1325 1328  
1326 1329                  /*
1327 1330                   * Initialise and chain in new entry
1328 1331                   */
1329 1332                  dep->de_handle = handle;
1330 1333                  dep->de_hash = hash;
1331 1334                  /*
1332 1335                   * Note de_namelen is a uchar_t to conserve space
1333 1336                   * and alignment padding. The max length of any
1334 1337                   * pathname component is defined as MAXNAMELEN
1335 1338                   * which is 256 (including the terminating null).
1336 1339                   * So provided this doesn't change, we don't include the null,
1337 1340                   * we always use bcmp to compare strings, and we don't
1338 1341                   * start storing full names, then we are ok.
1339 1342                   * The space savings is worth it.
1340 1343                   */
1341 1344                  dep->de_namelen = namlen;
1342 1345                  bcopy(name, dep->de_name, namlen);
1343 1346                  dep->de_next = *hp;
1344 1347                  *hp = dep;
1345 1348                  dcp->dc_actime = ddi_get_lbolt64();
1346 1349                  mutex_exit(&dcap->dca_lock);
1347 1350                  ncs.ncs_dir_num_ents.value.ui64++;
1348 1351                  return (DOK);
1349 1352          } else {
1350 1353                  mutex_exit(&dcap->dca_lock);
1351 1354                  kmem_free(dep, sizeof (dcentry_t) - 1 + namlen);
1352 1355                  return (DNOCACHE);
1353 1356          }
1354 1357  }
1355 1358  
1356 1359  /*
1357 1360   * Add free space to a partial or complete directory cache.
1358 1361   */
1359 1362  dcret_t
1360 1363  dnlc_dir_add_space(dcanchor_t *dcap, uint_t len, uint64_t handle)
1361 1364  {
1362 1365          dircache_t *dcp;
1363 1366          dcfree_t *dfp, **hp;
1364 1367          uint_t capacity;
1365 1368  
1366 1369          /*
1367 1370           * We kmem_alloc outside the lock to avoid possible deadlock if
1368 1371           * dnlc_dir_reclaim() is called as a result of memory shortage.
1369 1372           */
1370 1373          dfp = kmem_cache_alloc(dnlc_dir_space_cache, KM_NOSLEEP);
1371 1374          if (dfp == NULL) {
1372 1375  #ifdef DEBUG
1373 1376                  /*
1374 1377                   * The kmem allocator generates random failures for
1375 1378                   * KM_NOSLEEP calls (see KMEM_RANDOM_ALLOCATION_FAILURE)
1376 1379                   * So try again before we blow away a perfectly good cache.
1377 1380                   * This random error only occurs in debug mode
1378 1381                   */
1379 1382                  dfp = kmem_cache_alloc(dnlc_dir_space_cache, KM_NOSLEEP);
1380 1383                  if (dfp != NULL)
1381 1384                          goto ok;
1382 1385  #endif
1383 1386                  ncs.ncs_dir_add_nm.value.ui64++;
1384 1387                  /*
1385 1388                   * Free a directory cache. This may be the one we are
1386 1389                   * called with.
1387 1390                   */
1388 1391                  dnlc_dir_reclaim(NULL);
1389 1392                  dfp = kmem_cache_alloc(dnlc_dir_space_cache, KM_NOSLEEP);
1390 1393                  if (dfp == NULL) {
1391 1394                          /*
1392 1395                           * still no memory, better delete this cache
1393 1396                           */
1394 1397                          mutex_enter(&dcap->dca_lock);
1395 1398                          dcp = (dircache_t *)dcap->dca_dircache;
1396 1399                          if (VALID_DIR_CACHE(dcp)) {
1397 1400                                  dnlc_dir_abort(dcp);
1398 1401                                  dcap->dca_dircache = DC_RET_LOW_MEM;
1399 1402                          }
1400 1403                          mutex_exit(&dcap->dca_lock);
1401 1404                          ncs.ncs_dir_addabort.value.ui64++;
1402 1405                          return (DNOCACHE);
1403 1406                  }
1404 1407                  /*
1405 1408                   * fall through as if the 1st kmem_alloc had worked
1406 1409                   */
1407 1410          }
1408 1411  
1409 1412  #ifdef DEBUG
1410 1413  ok:
1411 1414  #endif
1412 1415          mutex_enter(&dcap->dca_lock);
1413 1416          dcp = (dircache_t *)dcap->dca_dircache;
1414 1417          if (VALID_DIR_CACHE(dcp)) {
1415 1418                  if ((dcp->dc_num_entries + dcp->dc_num_free) >
1416 1419                      dnlc_dir_max_size) {
1417 1420                          mutex_exit(&dcap->dca_lock);
1418 1421                          dnlc_dir_purge(dcap);
1419 1422                          kmem_cache_free(dnlc_dir_space_cache, dfp);
1420 1423                          ncs.ncs_dir_add_max.value.ui64++;
1421 1424                          return (DTOOBIG);
1422 1425                  }
1423 1426                  dcp->dc_num_free++;
1424 1427                  capacity = (dcp->dc_fhash_mask + 1) << dnlc_dir_hash_size_shift;
1425 1428                  if (dcp->dc_num_free >=
1426 1429                      (capacity << dnlc_dir_hash_resize_shift)) {
1427 1430                          dnlc_dir_adjust_fhash(dcp);
1428 1431                  }
1429 1432                  /*
1430 1433                   * Initialise and chain a new entry
1431 1434                   */
1432 1435                  dfp->df_handle = handle;
1433 1436                  dfp->df_len = len;
1434 1437                  dcp->dc_actime = ddi_get_lbolt64();
1435 1438                  hp = &(dcp->dc_freehash[DDFHASH(handle, dcp)]);
1436 1439                  dfp->df_next = *hp;
1437 1440                  *hp = dfp;
1438 1441                  mutex_exit(&dcap->dca_lock);
1439 1442                  ncs.ncs_dir_num_ents.value.ui64++;
1440 1443                  return (DOK);
1441 1444          } else {
1442 1445                  mutex_exit(&dcap->dca_lock);
1443 1446                  kmem_cache_free(dnlc_dir_space_cache, dfp);
1444 1447                  return (DNOCACHE);
1445 1448          }
1446 1449  }
1447 1450  
1448 1451  /*
1449 1452   * Mark a directory cache as complete.
1450 1453   */
1451 1454  void
1452 1455  dnlc_dir_complete(dcanchor_t *dcap)
1453 1456  {
1454 1457          dircache_t *dcp;
1455 1458  
1456 1459          mutex_enter(&dcap->dca_lock);
1457 1460          dcp = (dircache_t *)dcap->dca_dircache;
1458 1461          if (VALID_DIR_CACHE(dcp)) {
1459 1462                  dcp->dc_complete = B_TRUE;
1460 1463          }
1461 1464          mutex_exit(&dcap->dca_lock);
1462 1465  }
1463 1466  
1464 1467  /*
1465 1468   * Internal routine to delete a partial or full directory cache.
1466 1469   * No additional locking needed.
1467 1470   */
1468 1471  static void
1469 1472  dnlc_dir_abort(dircache_t *dcp)
1470 1473  {
1471 1474          dcentry_t *dep, *nhp;
1472 1475          dcfree_t *fep, *fhp;
1473 1476          uint_t nhtsize = dcp->dc_nhash_mask + 1; /* name hash table size */
1474 1477          uint_t fhtsize = dcp->dc_fhash_mask + 1; /* free hash table size */
1475 1478          uint_t i;
1476 1479  
1477 1480          /*
1478 1481           * Free up the cached name entries and hash table
1479 1482           */
1480 1483          for (i = 0; i < nhtsize; i++) { /* for each hash bucket */
1481 1484                  nhp = dcp->dc_namehash[i];
1482 1485                  while (nhp != NULL) { /* for each chained entry */
1483 1486                          dep = nhp->de_next;
1484 1487                          kmem_free(nhp, sizeof (dcentry_t) - 1 +
1485 1488                              nhp->de_namelen);
1486 1489                          nhp = dep;
1487 1490                  }
1488 1491          }
1489 1492          kmem_free(dcp->dc_namehash, sizeof (dcentry_t *) * nhtsize);
1490 1493  
1491 1494          /*
1492 1495           * Free up the free space entries and hash table
1493 1496           */
1494 1497          for (i = 0; i < fhtsize; i++) { /* for each hash bucket */
1495 1498                  fhp = dcp->dc_freehash[i];
1496 1499                  while (fhp != NULL) { /* for each chained entry */
1497 1500                          fep = fhp->df_next;
1498 1501                          kmem_cache_free(dnlc_dir_space_cache, fhp);
1499 1502                          fhp = fep;
1500 1503                  }
1501 1504          }
1502 1505          kmem_free(dcp->dc_freehash, sizeof (dcfree_t *) * fhtsize);
1503 1506  
1504 1507          /*
1505 1508           * Finally free the directory cache structure itself
1506 1509           */
1507 1510          ncs.ncs_dir_num_ents.value.ui64 -= (dcp->dc_num_entries +
1508 1511              dcp->dc_num_free);
1509 1512          kmem_free(dcp, sizeof (dircache_t));
1510 1513          ncs.ncs_cur_dirs.value.ui64--;
1511 1514  }
1512 1515  
1513 1516  /*
1514 1517   * Remove a partial or complete directory cache
1515 1518   */
1516 1519  void
1517 1520  dnlc_dir_purge(dcanchor_t *dcap)
1518 1521  {
1519 1522          dircache_t *dcp;
1520 1523  
1521 1524          mutex_enter(&dc_head.dch_lock);
1522 1525          mutex_enter(&dcap->dca_lock);
1523 1526          dcp = (dircache_t *)dcap->dca_dircache;
1524 1527          if (!VALID_DIR_CACHE(dcp)) {
1525 1528                  mutex_exit(&dcap->dca_lock);
1526 1529                  mutex_exit(&dc_head.dch_lock);
1527 1530                  return;
1528 1531          }
1529 1532          dcap->dca_dircache = NULL;
1530 1533          /*
1531 1534           * Unchain from global list
1532 1535           */
1533 1536          dcp->dc_prev->dc_next = dcp->dc_next;
1534 1537          dcp->dc_next->dc_prev = dcp->dc_prev;
1535 1538          mutex_exit(&dcap->dca_lock);
1536 1539          mutex_exit(&dc_head.dch_lock);
1537 1540          dnlc_dir_abort(dcp);
1538 1541  }
1539 1542  
1540 1543  /*
1541 1544   * Remove an entry from a complete or partial directory cache.
1542 1545   * Return the handle if it's non null.
1543 1546   */
1544 1547  dcret_t
1545 1548  dnlc_dir_rem_entry(dcanchor_t *dcap, const char *name, uint64_t *handlep)
1546 1549  {
1547 1550          dircache_t *dcp;
1548 1551          dcentry_t **prevpp, *te;
1549 1552          uint_t capacity;
1550 1553          int hash;
1551 1554          int ret;
1552 1555          uchar_t namlen;
1553 1556  
1554 1557          if (!dnlc_dir_enable) {
1555 1558                  return (DNOCACHE);
1556 1559          }
1557 1560  
1558 1561          mutex_enter(&dcap->dca_lock);
1559 1562          dcp = (dircache_t *)dcap->dca_dircache;
1560 1563          if (VALID_DIR_CACHE(dcp)) {
1561 1564                  dcp->dc_actime = ddi_get_lbolt64();
1562 1565                  if (dcp->dc_nhash_mask > 0) { /* ie not minimum */
1563 1566                          capacity = (dcp->dc_nhash_mask + 1) <<
1564 1567                              dnlc_dir_hash_size_shift;
1565 1568                          if (dcp->dc_num_entries <=
1566 1569                              (capacity >> dnlc_dir_hash_resize_shift)) {
1567 1570                                  dnlc_dir_adjust_nhash(dcp);
1568 1571                          }
1569 1572                  }
1570 1573                  DNLC_DIR_HASH(name, hash, namlen);
1571 1574                  prevpp = &dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1572 1575                  while (*prevpp != NULL) {
1573 1576                          if (((*prevpp)->de_hash == hash) &&
1574 1577                              (namlen == (*prevpp)->de_namelen) &&
1575 1578                              bcmp((*prevpp)->de_name, name, namlen) == 0) {
1576 1579                                  if (handlep != NULL) {
1577 1580                                          *handlep = (*prevpp)->de_handle;
1578 1581                                  }
1579 1582                                  te = *prevpp;
1580 1583                                  *prevpp = (*prevpp)->de_next;
1581 1584                                  kmem_free(te, sizeof (dcentry_t) - 1 +
1582 1585                                      te->de_namelen);
1583 1586  
1584 1587                                  /*
1585 1588                                   * If the total number of entries
1586 1589                                   * falls below half the minimum number
1587 1590                                   * of entries then free this cache.
1588 1591                                   */
1589 1592                                  if (--dcp->dc_num_entries <
1590 1593                                      (dnlc_dir_min_size >> 1)) {
1591 1594                                          mutex_exit(&dcap->dca_lock);
1592 1595                                          dnlc_dir_purge(dcap);
1593 1596                                  } else {
1594 1597                                          mutex_exit(&dcap->dca_lock);
1595 1598                                  }
1596 1599                                  ncs.ncs_dir_num_ents.value.ui64--;
1597 1600                                  return (DFOUND);
1598 1601                          }
1599 1602                          prevpp = &((*prevpp)->de_next);
1600 1603                  }
1601 1604                  if (dcp->dc_complete) {
1602 1605                          ncs.ncs_dir_reme_fai.value.ui64++;
1603 1606                          ret = DNOENT;
1604 1607                  } else {
1605 1608                          ret = DNOCACHE;
1606 1609                  }
1607 1610                  mutex_exit(&dcap->dca_lock);
1608 1611                  return (ret);
1609 1612          } else {
1610 1613                  mutex_exit(&dcap->dca_lock);
1611 1614                  return (DNOCACHE);
1612 1615          }
1613 1616  }
1614 1617  
1615 1618  
1616 1619  /*
1617 1620   * Remove free space of at least the given length from a complete
1618 1621   * or partial directory cache.
1619 1622   */
1620 1623  dcret_t
1621 1624  dnlc_dir_rem_space_by_len(dcanchor_t *dcap, uint_t len, uint64_t *handlep)
1622 1625  {
1623 1626          dircache_t *dcp;
1624 1627          dcfree_t **prevpp, *tfp;
1625 1628          uint_t fhtsize; /* free hash table size */
1626 1629          uint_t i;
1627 1630          uint_t capacity;
1628 1631          int ret;
1629 1632  
1630 1633          if (!dnlc_dir_enable) {
1631 1634                  return (DNOCACHE);
1632 1635          }
1633 1636  
1634 1637          mutex_enter(&dcap->dca_lock);
1635 1638          dcp = (dircache_t *)dcap->dca_dircache;
1636 1639          if (VALID_DIR_CACHE(dcp)) {
1637 1640                  dcp->dc_actime = ddi_get_lbolt64();
1638 1641                  if (dcp->dc_fhash_mask > 0) { /* ie not minimum */
1639 1642                          capacity = (dcp->dc_fhash_mask + 1) <<
1640 1643                              dnlc_dir_hash_size_shift;
1641 1644                          if (dcp->dc_num_free <=
1642 1645                              (capacity >> dnlc_dir_hash_resize_shift)) {
1643 1646                                  dnlc_dir_adjust_fhash(dcp);
1644 1647                          }
1645 1648                  }
1646 1649                  /*
1647 1650                   * Search for an entry of the appropriate size
1648 1651                   * on a first fit basis.
1649 1652                   */
1650 1653                  fhtsize = dcp->dc_fhash_mask + 1;
1651 1654                  for (i = 0; i < fhtsize; i++) { /* for each hash bucket */
1652 1655                          prevpp = &(dcp->dc_freehash[i]);
1653 1656                          while (*prevpp != NULL) {
1654 1657                                  if ((*prevpp)->df_len >= len) {
1655 1658                                          *handlep = (*prevpp)->df_handle;
1656 1659                                          tfp = *prevpp;
1657 1660                                          *prevpp = (*prevpp)->df_next;
1658 1661                                          dcp->dc_num_free--;
1659 1662                                          mutex_exit(&dcap->dca_lock);
1660 1663                                          kmem_cache_free(dnlc_dir_space_cache,
1661 1664                                              tfp);
1662 1665                                          ncs.ncs_dir_num_ents.value.ui64--;
1663 1666                                          return (DFOUND);
1664 1667                                  }
1665 1668                                  prevpp = &((*prevpp)->df_next);
1666 1669                          }
1667 1670                  }
1668 1671                  if (dcp->dc_complete) {
1669 1672                          ret = DNOENT;
1670 1673                  } else {
1671 1674                          ret = DNOCACHE;
1672 1675                  }
1673 1676                  mutex_exit(&dcap->dca_lock);
1674 1677                  return (ret);
1675 1678          } else {
1676 1679                  mutex_exit(&dcap->dca_lock);
1677 1680                  return (DNOCACHE);
1678 1681          }
1679 1682  }
1680 1683  
1681 1684  /*
1682 1685   * Remove free space with the given handle from a complete or partial
1683 1686   * directory cache.
1684 1687   */
1685 1688  dcret_t
1686 1689  dnlc_dir_rem_space_by_handle(dcanchor_t *dcap, uint64_t handle)
1687 1690  {
1688 1691          dircache_t *dcp;
1689 1692          dcfree_t **prevpp, *tfp;
1690 1693          uint_t capacity;
1691 1694          int ret;
1692 1695  
1693 1696          if (!dnlc_dir_enable) {
1694 1697                  return (DNOCACHE);
1695 1698          }
1696 1699  
1697 1700          mutex_enter(&dcap->dca_lock);
1698 1701          dcp = (dircache_t *)dcap->dca_dircache;
1699 1702          if (VALID_DIR_CACHE(dcp)) {
1700 1703                  dcp->dc_actime = ddi_get_lbolt64();
1701 1704                  if (dcp->dc_fhash_mask > 0) { /* ie not minimum */
1702 1705                          capacity = (dcp->dc_fhash_mask + 1) <<
1703 1706                              dnlc_dir_hash_size_shift;
1704 1707                          if (dcp->dc_num_free <=
1705 1708                              (capacity >> dnlc_dir_hash_resize_shift)) {
1706 1709                                  dnlc_dir_adjust_fhash(dcp);
1707 1710                          }
1708 1711                  }
1709 1712  
1710 1713                  /*
1711 1714                   * search for the exact entry
1712 1715                   */
1713 1716                  prevpp = &(dcp->dc_freehash[DDFHASH(handle, dcp)]);
1714 1717                  while (*prevpp != NULL) {
1715 1718                          if ((*prevpp)->df_handle == handle) {
1716 1719                                  tfp = *prevpp;
1717 1720                                  *prevpp = (*prevpp)->df_next;
1718 1721                                  dcp->dc_num_free--;
1719 1722                                  mutex_exit(&dcap->dca_lock);
1720 1723                                  kmem_cache_free(dnlc_dir_space_cache, tfp);
1721 1724                                  ncs.ncs_dir_num_ents.value.ui64--;
1722 1725                                  return (DFOUND);
1723 1726                          }
1724 1727                          prevpp = &((*prevpp)->df_next);
1725 1728                  }
1726 1729                  if (dcp->dc_complete) {
1727 1730                          ncs.ncs_dir_rems_fai.value.ui64++;
1728 1731                          ret = DNOENT;
1729 1732                  } else {
1730 1733                          ret = DNOCACHE;
1731 1734                  }
1732 1735                  mutex_exit(&dcap->dca_lock);
1733 1736                  return (ret);
1734 1737          } else {
1735 1738                  mutex_exit(&dcap->dca_lock);
1736 1739                  return (DNOCACHE);
1737 1740          }
1738 1741  }
1739 1742  
1740 1743  /*
1741 1744   * Update the handle of an directory cache entry.
1742 1745   */
1743 1746  dcret_t
1744 1747  dnlc_dir_update(dcanchor_t *dcap, const char *name, uint64_t handle)
1745 1748  {
1746 1749          dircache_t *dcp;
1747 1750          dcentry_t *dep;
1748 1751          int hash;
1749 1752          int ret;
1750 1753          uchar_t namlen;
1751 1754  
1752 1755          if (!dnlc_dir_enable) {
1753 1756                  return (DNOCACHE);
1754 1757          }
1755 1758  
1756 1759          mutex_enter(&dcap->dca_lock);
1757 1760          dcp = (dircache_t *)dcap->dca_dircache;
1758 1761          if (VALID_DIR_CACHE(dcp)) {
1759 1762                  dcp->dc_actime = ddi_get_lbolt64();
1760 1763                  DNLC_DIR_HASH(name, hash, namlen);
1761 1764                  dep = dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1762 1765                  while (dep != NULL) {
1763 1766                          if ((dep->de_hash == hash) &&
1764 1767                              (namlen == dep->de_namelen) &&
1765 1768                              bcmp(dep->de_name, name, namlen) == 0) {
1766 1769                                  dep->de_handle = handle;
1767 1770                                  mutex_exit(&dcap->dca_lock);
1768 1771                                  return (DFOUND);
1769 1772                          }
1770 1773                          dep = dep->de_next;
1771 1774                  }
1772 1775                  if (dcp->dc_complete) {
1773 1776                          ncs.ncs_dir_upd_fail.value.ui64++;
1774 1777                          ret = DNOENT;
1775 1778                  } else {
1776 1779                          ret = DNOCACHE;
1777 1780                  }
1778 1781                  mutex_exit(&dcap->dca_lock);
1779 1782                  return (ret);
1780 1783          } else {
1781 1784                  mutex_exit(&dcap->dca_lock);
1782 1785                  return (DNOCACHE);
1783 1786          }
1784 1787  }
1785 1788  
1786 1789  void
1787 1790  dnlc_dir_fini(dcanchor_t *dcap)
1788 1791  {
1789 1792          dircache_t *dcp;
1790 1793  
1791 1794          mutex_enter(&dc_head.dch_lock);
1792 1795          mutex_enter(&dcap->dca_lock);
1793 1796          dcp = (dircache_t *)dcap->dca_dircache;
1794 1797          if (VALID_DIR_CACHE(dcp)) {
1795 1798                  /*
1796 1799                   * Unchain from global list
1797 1800                   */
1798 1801                  ncs.ncs_dir_finipurg.value.ui64++;
1799 1802                  dcp->dc_prev->dc_next = dcp->dc_next;
1800 1803                  dcp->dc_next->dc_prev = dcp->dc_prev;
1801 1804          } else {
1802 1805                  dcp = NULL;
1803 1806          }
1804 1807          dcap->dca_dircache = NULL;
1805 1808          mutex_exit(&dcap->dca_lock);
1806 1809          mutex_exit(&dc_head.dch_lock);
1807 1810          mutex_destroy(&dcap->dca_lock);
1808 1811          if (dcp) {
1809 1812                  dnlc_dir_abort(dcp);
1810 1813          }
1811 1814  }
1812 1815  
1813 1816  /*
1814 1817   * Reclaim callback for dnlc directory caching.
1815 1818   * Invoked by the kernel memory allocator when memory gets tight.
1816 1819   * This is a pretty serious condition and can lead easily lead to system
1817 1820   * hangs if not enough space is returned.
1818 1821   *
1819 1822   * Deciding which directory (or directories) to purge is tricky.
1820 1823   * Purging everything is an overkill, but purging just the oldest used
1821 1824   * was found to lead to hangs. The largest cached directories use the
1822 1825   * most memory, but take the most effort to rebuild, whereas the smaller
1823 1826   * ones have little value and give back little space. So what to do?
1824 1827   *
1825 1828   * The current policy is to continue purging the oldest used directories
1826 1829   * until at least dnlc_dir_min_reclaim directory entries have been purged.
1827 1830   */
1828 1831  /*ARGSUSED*/
1829 1832  static void
1830 1833  dnlc_dir_reclaim(void *unused)
1831 1834  {
1832 1835          dircache_t *dcp, *oldest;
1833 1836          uint_t dirent_cnt = 0;
1834 1837  
1835 1838          mutex_enter(&dc_head.dch_lock);
1836 1839          while (dirent_cnt < dnlc_dir_min_reclaim) {
1837 1840                  dcp = dc_head.dch_next;
1838 1841                  oldest = NULL;
1839 1842                  while (dcp != (dircache_t *)&dc_head) {
1840 1843                          if (oldest == NULL) {
1841 1844                                  oldest = dcp;
1842 1845                          } else {
1843 1846                                  if (dcp->dc_actime < oldest->dc_actime) {
1844 1847                                          oldest = dcp;
1845 1848                                  }
1846 1849                          }
1847 1850                          dcp = dcp->dc_next;
1848 1851                  }
1849 1852                  if (oldest == NULL) {
1850 1853                          /* nothing to delete */
1851 1854                          mutex_exit(&dc_head.dch_lock);
1852 1855                          return;
1853 1856                  }
1854 1857                  /*
1855 1858                   * remove from directory chain and purge
1856 1859                   */
1857 1860                  oldest->dc_prev->dc_next = oldest->dc_next;
1858 1861                  oldest->dc_next->dc_prev = oldest->dc_prev;
1859 1862                  mutex_enter(&oldest->dc_anchor->dca_lock);
1860 1863                  /*
1861 1864                   * If this was the last entry then it must be too large.
1862 1865                   * Mark it as such by saving a special dircache_t
1863 1866                   * pointer (DC_RET_LOW_MEM) in the anchor. The error DNOMEM
1864 1867                   * will be presented to the caller of dnlc_dir_start()
1865 1868                   */
1866 1869                  if (oldest->dc_next == oldest->dc_prev) {
1867 1870                          oldest->dc_anchor->dca_dircache = DC_RET_LOW_MEM;
1868 1871                          ncs.ncs_dir_rec_last.value.ui64++;
1869 1872                  } else {
1870 1873                          oldest->dc_anchor->dca_dircache = NULL;
1871 1874                          ncs.ncs_dir_recl_any.value.ui64++;
1872 1875                  }
1873 1876                  mutex_exit(&oldest->dc_anchor->dca_lock);
1874 1877                  dirent_cnt += oldest->dc_num_entries;
1875 1878                  dnlc_dir_abort(oldest);
1876 1879          }
1877 1880          mutex_exit(&dc_head.dch_lock);
1878 1881  }
1879 1882  
1880 1883  /*
1881 1884   * Dynamically grow or shrink the size of the name hash table
1882 1885   */
1883 1886  static void
1884 1887  dnlc_dir_adjust_nhash(dircache_t *dcp)
1885 1888  {
1886 1889          dcentry_t **newhash, *dep, **nhp, *tep;
1887 1890          uint_t newsize;
1888 1891          uint_t oldsize;
1889 1892          uint_t newsizemask;
1890 1893          int i;
1891 1894  
1892 1895          /*
1893 1896           * Allocate new hash table
1894 1897           */
1895 1898          newsize = dcp->dc_num_entries >> dnlc_dir_hash_size_shift;
1896 1899          newhash = kmem_zalloc(sizeof (dcentry_t *) * newsize, KM_NOSLEEP);
1897 1900          if (newhash == NULL) {
1898 1901                  /*
1899 1902                   * System is short on memory just return
1900 1903                   * Note, the old hash table is still usable.
1901 1904                   * This return is unlikely to repeatedy occur, because
1902 1905                   * either some other directory caches will be reclaimed
1903 1906                   * due to memory shortage, thus freeing memory, or this
1904 1907                   * directory cahe will be reclaimed.
1905 1908                   */
1906 1909                  return;
1907 1910          }
1908 1911          oldsize = dcp->dc_nhash_mask + 1;
1909 1912          dcp->dc_nhash_mask = newsizemask = newsize - 1;
1910 1913  
1911 1914          /*
1912 1915           * Move entries from the old table to the new
1913 1916           */
1914 1917          for (i = 0; i < oldsize; i++) { /* for each hash bucket */
1915 1918                  dep = dcp->dc_namehash[i];
1916 1919                  while (dep != NULL) { /* for each chained entry */
1917 1920                          tep = dep;
1918 1921                          dep = dep->de_next;
1919 1922                          nhp = &newhash[tep->de_hash & newsizemask];
1920 1923                          tep->de_next = *nhp;
1921 1924                          *nhp = tep;
1922 1925                  }
1923 1926          }
1924 1927  
1925 1928          /*
1926 1929           * delete old hash table and set new one in place
1927 1930           */
1928 1931          kmem_free(dcp->dc_namehash, sizeof (dcentry_t *) * oldsize);
1929 1932          dcp->dc_namehash = newhash;
1930 1933  }
1931 1934  
1932 1935  /*
1933 1936   * Dynamically grow or shrink the size of the free space hash table
1934 1937   */
1935 1938  static void
1936 1939  dnlc_dir_adjust_fhash(dircache_t *dcp)
1937 1940  {
1938 1941          dcfree_t **newhash, *dfp, **nhp, *tfp;
1939 1942          uint_t newsize;
1940 1943          uint_t oldsize;
1941 1944          int i;
1942 1945  
1943 1946          /*
1944 1947           * Allocate new hash table
1945 1948           */
1946 1949          newsize = dcp->dc_num_free >> dnlc_dir_hash_size_shift;
1947 1950          newhash = kmem_zalloc(sizeof (dcfree_t *) * newsize, KM_NOSLEEP);
1948 1951          if (newhash == NULL) {
1949 1952                  /*
1950 1953                   * System is short on memory just return
1951 1954                   * Note, the old hash table is still usable.
1952 1955                   * This return is unlikely to repeatedy occur, because
1953 1956                   * either some other directory caches will be reclaimed
1954 1957                   * due to memory shortage, thus freeing memory, or this
1955 1958                   * directory cahe will be reclaimed.
1956 1959                   */
1957 1960                  return;
1958 1961          }
1959 1962          oldsize = dcp->dc_fhash_mask + 1;
1960 1963          dcp->dc_fhash_mask = newsize - 1;
1961 1964  
1962 1965          /*
1963 1966           * Move entries from the old table to the new
1964 1967           */
1965 1968          for (i = 0; i < oldsize; i++) { /* for each hash bucket */
1966 1969                  dfp = dcp->dc_freehash[i];
1967 1970                  while (dfp != NULL) { /* for each chained entry */
1968 1971                          tfp = dfp;
1969 1972                          dfp = dfp->df_next;
1970 1973                          nhp = &newhash[DDFHASH(tfp->df_handle, dcp)];
1971 1974                          tfp->df_next = *nhp;
1972 1975                          *nhp = tfp;
1973 1976                  }
1974 1977          }
1975 1978  
1976 1979          /*
1977 1980           * delete old hash table and set new one in place
1978 1981           */
1979 1982          kmem_free(dcp->dc_freehash, sizeof (dcfree_t *) * oldsize);
1980 1983          dcp->dc_freehash = newhash;
1981 1984  }
  
    | 
      ↓ open down ↓ | 
    1663 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX