1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
  24  * Copyright (c) 2015, Joyent, Inc.
  25  * Copyright (c) 2017 by Delphix. All rights reserved.
  26  */
  27 
  28 /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  29 /*        All Rights Reserved   */
  30 
  31 /*
  32  * University Copyright- Copyright (c) 1982, 1986, 1988
  33  * The Regents of the University of California
  34  * All Rights Reserved
  35  *
  36  * University Acknowledgment- Portions of this document are derived from
  37  * software developed by the University of California, Berkeley, and its
  38  * contributors.
  39  */
  40 
  41 #include <sys/types.h>
  42 #include <sys/systm.h>
  43 #include <sys/param.h>
  44 #include <sys/t_lock.h>
  45 #include <sys/systm.h>
  46 #include <sys/vfs.h>
  47 #include <sys/vnode.h>
  48 #include <sys/dnlc.h>
  49 #include <sys/kmem.h>
  50 #include <sys/cmn_err.h>
  51 #include <sys/vtrace.h>
  52 #include <sys/bitmap.h>
  53 #include <sys/var.h>
  54 #include <sys/sysmacros.h>
  55 #include <sys/kstat.h>
  56 #include <sys/atomic.h>
  57 #include <sys/taskq.h>
  58 
  59 /*
  60  * Directory name lookup cache.
  61  * Based on code originally done by Robert Elz at Melbourne.
  62  *
  63  * Names found by directory scans are retained in a cache
  64  * for future reference.  Each hash chain is ordered by LRU
  65  * Cache is indexed by hash value obtained from (vp, name)
  66  * where the vp refers to the directory containing the name.
  67  */
  68 
  69 /*
  70  * We want to be able to identify files that are referenced only by the DNLC.
  71  * When adding a reference from the DNLC, call VN_HOLD_DNLC instead of VN_HOLD,
  72  * since multiple DNLC references should only be counted once in v_count. The
  73  * VN_HOLD macro itself is aliased to VN_HOLD_CALLER in this file to help
  74  * differentiate the behaviors.  (Unfortunately it is not possible to #undef
  75  * VN_HOLD and retain VN_HOLD_CALLER. Ideally a Makefile rule would grep
  76  * uncommented C tokens to check that VN_HOLD is referenced only once in this
  77  * file, to define VN_HOLD_CALLER.)
  78  */
  79 #define VN_HOLD_CALLER  VN_HOLD
  80 #define VN_HOLD_DNLC(vp)        {       \
  81         mutex_enter(&(vp)->v_lock);      \
  82         if ((vp)->v_count_dnlc == 0) {       \
  83                 VN_HOLD_LOCKED(vp);     \
  84         }                               \
  85         (vp)->v_count_dnlc++;                \
  86         mutex_exit(&(vp)->v_lock);       \
  87 }
  88 #define VN_RELE_DNLC(vp)        {       \
  89         vn_rele_dnlc(vp);               \
  90 }
  91 
  92 /*
  93  * Tunable nc_hashavelen is the average length desired for this chain, from
  94  * which the size of the nc_hash table is derived at create time.
  95  */
  96 #define NC_HASHAVELEN_DEFAULT   4
  97 int nc_hashavelen = NC_HASHAVELEN_DEFAULT;
  98 
  99 /*
 100  * NC_MOVETOFRONT is the move-to-front threshold: if the hash lookup
 101  * depth exceeds this value, we move the looked-up entry to the front of
 102  * its hash chain.  The idea is to make sure that the most frequently
 103  * accessed entries are found most quickly (by keeping them near the
 104  * front of their hash chains).
 105  */
 106 #define NC_MOVETOFRONT  2
 107 
 108 /*
 109  *
 110  * DNLC_MAX_RELE is used to size an array on the stack when releasing
 111  * vnodes. This array is used rather than calling VN_RELE() inline because
 112  * all dnlc locks must be dropped by that time in order to avoid a
 113  * possible deadlock. This deadlock occurs when the dnlc holds the last
 114  * reference to the vnode and so the VOP_INACTIVE vector is called which
 115  * can in turn call back into the dnlc. A global array was used but had
 116  * many problems:
 117  *      1) Actually doesn't have an upper bound on the array size as
 118  *         entries can be added after starting the purge.
 119  *      2) The locking scheme causes a hang.
 120  *      3) Caused serialisation on the global lock.
 121  *      4) The array was often unnecessarily huge.
 122  *
 123  * Note the current value 8 allows up to 4 cache entries (to be purged
 124  * from each hash chain), before having to cycle around and retry.
 125  * This ought to be ample given that nc_hashavelen is typically very small.
 126  */
 127 #define DNLC_MAX_RELE   8 /* must be even */
 128 
 129 /*
 130  * Hash table of name cache entries for fast lookup, dynamically
 131  * allocated at startup.
 132  */
 133 nc_hash_t *nc_hash;
 134 
 135 /*
 136  * Rotors. Used to select entries on a round-robin basis.
 137  */
 138 static nc_hash_t *dnlc_purge_fs1_rotor;
 139 static nc_hash_t *dnlc_free_rotor;
 140 
 141 /*
 142  * # of dnlc entries (uninitialized)
 143  *
 144  * the initial value was chosen as being
 145  * a random string of bits, probably not
 146  * normally chosen by a systems administrator
 147  */
 148 volatile int ncsize = -1;
 149 volatile uint32_t dnlc_nentries = 0;    /* current num of name cache entries */
 150 static int nc_hashsz;                   /* size of hash table */
 151 static int nc_hashmask;                 /* size of hash table minus 1 */
 152 
 153 /*
 154  * The dnlc_reduce_cache() taskq queue is activated when there are
 155  * ncsize name cache entries and if no parameter is provided, it reduces
 156  * the size down to dnlc_nentries_low_water, which is by default one
 157  * hundreth less (or 99%) of ncsize.
 158  *
 159  * If a parameter is provided to dnlc_reduce_cache(), then we reduce
 160  * the size down based on ncsize_onepercent - where ncsize_onepercent
 161  * is 1% of ncsize; however, we never let dnlc_reduce_cache() reduce
 162  * the size below 3% of ncsize (ncsize_min_percent).
 163  */
 164 #define DNLC_LOW_WATER_DIVISOR_DEFAULT 100
 165 uint_t dnlc_low_water_divisor = DNLC_LOW_WATER_DIVISOR_DEFAULT;
 166 uint_t dnlc_nentries_low_water;
 167 int dnlc_reduce_idle = 1; /* no locking needed */
 168 uint_t ncsize_onepercent;
 169 uint_t ncsize_min_percent;
 170 
 171 /*
 172  * If dnlc_nentries hits dnlc_max_nentries (twice ncsize)
 173  * then this means the dnlc_reduce_cache() taskq is failing to
 174  * keep up. In this case we refuse to add new entries to the dnlc
 175  * until the taskq catches up.
 176  */
 177 uint_t dnlc_max_nentries; /* twice ncsize */
 178 uint64_t dnlc_max_nentries_cnt = 0; /* statistic on times we failed */
 179 
 180 /*
 181  * Tunable to define when we should just remove items from
 182  * the end of the chain.
 183  */
 184 #define DNLC_LONG_CHAIN 8
 185 uint_t dnlc_long_chain = DNLC_LONG_CHAIN;
 186 
 187 /*
 188  * ncstats has been deprecated, due to the integer size of the counters
 189  * which can easily overflow in the dnlc.
 190  * It is maintained (at some expense) for compatability.
 191  * The preferred interface is the kstat accessible nc_stats below.
 192  */
 193 struct ncstats ncstats;
 194 
 195 struct nc_stats ncs = {
 196         { "hits",                       KSTAT_DATA_UINT64 },
 197         { "misses",                     KSTAT_DATA_UINT64 },
 198         { "negative_cache_hits",        KSTAT_DATA_UINT64 },
 199         { "enters",                     KSTAT_DATA_UINT64 },
 200         { "double_enters",              KSTAT_DATA_UINT64 },
 201         { "purge_total_entries",        KSTAT_DATA_UINT64 },
 202         { "purge_all",                  KSTAT_DATA_UINT64 },
 203         { "purge_vp",                   KSTAT_DATA_UINT64 },
 204         { "purge_vfs",                  KSTAT_DATA_UINT64 },
 205         { "purge_fs1",                  KSTAT_DATA_UINT64 },
 206         { "pick_free",                  KSTAT_DATA_UINT64 },
 207         { "pick_heuristic",             KSTAT_DATA_UINT64 },
 208         { "pick_last",                  KSTAT_DATA_UINT64 },
 209 
 210         /* directory caching stats */
 211 
 212         { "dir_hits",                   KSTAT_DATA_UINT64 },
 213         { "dir_misses",                 KSTAT_DATA_UINT64 },
 214         { "dir_cached_current",         KSTAT_DATA_UINT64 },
 215         { "dir_entries_cached_current", KSTAT_DATA_UINT64 },
 216         { "dir_cached_total",           KSTAT_DATA_UINT64 },
 217         { "dir_start_no_memory",        KSTAT_DATA_UINT64 },
 218         { "dir_add_no_memory",          KSTAT_DATA_UINT64 },
 219         { "dir_add_abort",              KSTAT_DATA_UINT64 },
 220         { "dir_add_max",                KSTAT_DATA_UINT64 },
 221         { "dir_remove_entry_fail",      KSTAT_DATA_UINT64 },
 222         { "dir_remove_space_fail",      KSTAT_DATA_UINT64 },
 223         { "dir_update_fail",            KSTAT_DATA_UINT64 },
 224         { "dir_fini_purge",             KSTAT_DATA_UINT64 },
 225         { "dir_reclaim_last",           KSTAT_DATA_UINT64 },
 226         { "dir_reclaim_any",            KSTAT_DATA_UINT64 },
 227 };
 228 
 229 static int doingcache = 1;
 230 
 231 vnode_t negative_cache_vnode;
 232 
 233 /*
 234  * Insert entry at the front of the queue
 235  */
 236 #define nc_inshash(ncp, hp) \
 237 { \
 238         (ncp)->hash_next = (hp)->hash_next; \
 239         (ncp)->hash_prev = (ncache_t *)(hp); \
 240         (hp)->hash_next->hash_prev = (ncp); \
 241         (hp)->hash_next = (ncp); \
 242 }
 243 
 244 /*
 245  * Remove entry from hash queue
 246  */
 247 #define nc_rmhash(ncp) \
 248 { \
 249         (ncp)->hash_prev->hash_next = (ncp)->hash_next; \
 250         (ncp)->hash_next->hash_prev = (ncp)->hash_prev; \
 251         (ncp)->hash_prev = NULL; \
 252         (ncp)->hash_next = NULL; \
 253 }
 254 
 255 /*
 256  * Free an entry.
 257  */
 258 #define dnlc_free(ncp) \
 259 { \
 260         kmem_free((ncp), sizeof (ncache_t) + (ncp)->namlen); \
 261         atomic_dec_32(&dnlc_nentries); \
 262 }
 263 
 264 
 265 /*
 266  * Cached directory info.
 267  * ======================
 268  */
 269 
 270 /*
 271  * Cached directory free space hash function.
 272  * Needs the free space handle and the dcp to get the hash table size
 273  * Returns the hash index.
 274  */
 275 #define DDFHASH(handle, dcp) ((handle >> 2) & (dcp)->dc_fhash_mask)
 276 
 277 /*
 278  * Cached directory name entry hash function.
 279  * Uses the name and returns in the input arguments the hash and the name
 280  * length.
 281  */
 282 #define DNLC_DIR_HASH(name, hash, namelen)                      \
 283         {                                                       \
 284                 char Xc;                                        \
 285                 const char *Xcp;                                \
 286                 hash = *name;                                   \
 287                 for (Xcp = (name + 1); (Xc = *Xcp) != 0; Xcp++) \
 288                         hash = (hash << 4) + hash + Xc;           \
 289                 ASSERT((Xcp - (name)) <= ((1 << NBBY) - 1));   \
 290                 namelen = Xcp - (name);                         \
 291         }
 292 
 293 /* special dircache_t pointer to indicate error should be returned */
 294 /*
 295  * The anchor directory cache pointer can contain 3 types of values,
 296  * 1) NULL: No directory cache
 297  * 2) DC_RET_LOW_MEM (-1): There was a directory cache that found to be
 298  *    too big or a memory shortage occurred. This value remains in the
 299  *    pointer until a dnlc_dir_start() which returns the a DNOMEM error.
 300  *    This is kludgy but efficient and only visible in this source file.
 301  * 3) A valid cache pointer.
 302  */
 303 #define DC_RET_LOW_MEM (dircache_t *)1
 304 #define VALID_DIR_CACHE(dcp) ((dircache_t *)(dcp) > DC_RET_LOW_MEM)
 305 
 306 /* Tunables */
 307 volatile uint_t dnlc_dir_enable = 1;    /* disable caching directories by */
 308                                         /* setting to 0 */
 309 volatile uint_t dnlc_dir_min_size = 40; /* min no of directory entries before */
 310                                         /* caching */
 311 volatile uint_t dnlc_dir_max_size = UINT_MAX; /* ditto maximum */
 312 uint_t dnlc_dir_hash_size_shift = 3; /* 8 entries per hash bucket */
 313 uint_t dnlc_dir_min_reclaim =  350000; /* approx 1MB of dcentrys */
 314 /*
 315  * dnlc_dir_hash_resize_shift determines when the hash tables
 316  * get re-adjusted due to growth or shrinkage
 317  * - currently 2 indicating that there can be at most 4
 318  * times or at least one quarter the number of entries
 319  * before hash table readjustment. Note that with
 320  * dnlc_dir_hash_size_shift above set at 3 this would
 321  * mean readjustment would occur if the average number
 322  * of entries went above 32 or below 2
 323  */
 324 uint_t dnlc_dir_hash_resize_shift = 2; /* readjust rate */
 325 
 326 static kmem_cache_t *dnlc_dir_space_cache; /* free space entry cache */
 327 static dchead_t dc_head; /* anchor of cached directories */
 328 
 329 /* Prototypes */
 330 static ncache_t *dnlc_get(uchar_t namlen);
 331 static ncache_t *dnlc_search(vnode_t *dp, const char *name, uchar_t namlen,
 332     int hash);
 333 static void dnlc_dir_reclaim(void *unused);
 334 static void dnlc_dir_abort(dircache_t *dcp);
 335 static void dnlc_dir_adjust_fhash(dircache_t *dcp);
 336 static void dnlc_dir_adjust_nhash(dircache_t *dcp);
 337 static void do_dnlc_reduce_cache(void *);
 338 
 339 
 340 /*
 341  * Initialize the directory cache.
 342  */
 343 void
 344 dnlc_init()
 345 {
 346         nc_hash_t *hp;
 347         kstat_t *ksp;
 348         int i;
 349 
 350         /*
 351          * Set up the size of the dnlc (ncsize) and its low water mark.
 352          */
 353         if (ncsize == -1) {
 354                 /* calculate a reasonable size for the low water */
 355                 dnlc_nentries_low_water = 4 * (v.v_proc + maxusers) + 320;
 356                 ncsize = dnlc_nentries_low_water +
 357                     (dnlc_nentries_low_water / dnlc_low_water_divisor);
 358         } else {
 359                 /* don't change the user specified ncsize */
 360                 dnlc_nentries_low_water =
 361                     ncsize - (ncsize / dnlc_low_water_divisor);
 362         }
 363         if (ncsize <= 0) {
 364                 doingcache = 0;
 365                 dnlc_dir_enable = 0; /* also disable directory caching */
 366                 ncsize = 0;
 367                 cmn_err(CE_NOTE, "name cache (dnlc) disabled");
 368                 return;
 369         }
 370         dnlc_max_nentries = ncsize * 2;
 371         ncsize_onepercent = ncsize / 100;
 372         ncsize_min_percent = ncsize_onepercent * 3;
 373 
 374         /*
 375          * Initialise the hash table.
 376          * Compute hash size rounding to the next power of two.
 377          */
 378         nc_hashsz = ncsize / nc_hashavelen;
 379         nc_hashsz = 1 << highbit(nc_hashsz);
 380         nc_hashmask = nc_hashsz - 1;
 381         nc_hash = kmem_zalloc(nc_hashsz * sizeof (*nc_hash), KM_SLEEP);
 382         for (i = 0; i < nc_hashsz; i++) {
 383                 hp = (nc_hash_t *)&nc_hash[i];
 384                 mutex_init(&hp->hash_lock, NULL, MUTEX_DEFAULT, NULL);
 385                 hp->hash_next = (ncache_t *)hp;
 386                 hp->hash_prev = (ncache_t *)hp;
 387         }
 388 
 389         /*
 390          * Initialize rotors
 391          */
 392         dnlc_free_rotor = dnlc_purge_fs1_rotor = &nc_hash[0];
 393 
 394         /*
 395          * Set up the directory caching to use kmem_cache_alloc
 396          * for its free space entries so that we can get a callback
 397          * when the system is short on memory, to allow us to free
 398          * up some memory. we don't use the constructor/deconstructor
 399          * functions.
 400          */
 401         dnlc_dir_space_cache = kmem_cache_create("dnlc_space_cache",
 402             sizeof (dcfree_t), 0, NULL, NULL, dnlc_dir_reclaim, NULL,
 403             NULL, 0);
 404 
 405         /*
 406          * Initialise the head of the cached directory structures
 407          */
 408         mutex_init(&dc_head.dch_lock, NULL, MUTEX_DEFAULT, NULL);
 409         dc_head.dch_next = (dircache_t *)&dc_head;
 410         dc_head.dch_prev = (dircache_t *)&dc_head;
 411 
 412         /*
 413          * Put a hold on the negative cache vnode so that it never goes away
 414          * (VOP_INACTIVE isn't called on it).
 415          */
 416         vn_reinit(&negative_cache_vnode);
 417 
 418         /*
 419          * Initialise kstats - both the old compatability raw kind and
 420          * the more extensive named stats.
 421          */
 422         ksp = kstat_create("unix", 0, "ncstats", "misc", KSTAT_TYPE_RAW,
 423             sizeof (struct ncstats), KSTAT_FLAG_VIRTUAL);
 424         if (ksp) {
 425                 ksp->ks_data = (void *) &ncstats;
 426                 kstat_install(ksp);
 427         }
 428         ksp = kstat_create("unix", 0, "dnlcstats", "misc", KSTAT_TYPE_NAMED,
 429             sizeof (ncs) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
 430         if (ksp) {
 431                 ksp->ks_data = (void *) &ncs;
 432                 kstat_install(ksp);
 433         }
 434 }
 435 
 436 /*
 437  * Add a name to the directory cache.
 438  */
 439 void
 440 dnlc_enter(vnode_t *dp, const char *name, vnode_t *vp)
 441 {
 442         ncache_t *ncp;
 443         nc_hash_t *hp;
 444         uchar_t namlen;
 445         int hash;
 446 
 447         TRACE_0(TR_FAC_NFS, TR_DNLC_ENTER_START, "dnlc_enter_start:");
 448 
 449         if (!doingcache) {
 450                 TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
 451                     "dnlc_enter_end:(%S) %d", "not caching", 0);
 452                 return;
 453         }
 454 
 455         /*
 456          * Get a new dnlc entry. Assume the entry won't be in the cache
 457          * and initialize it now
 458          */
 459         DNLCHASH(name, dp, hash, namlen);
 460         if ((ncp = dnlc_get(namlen)) == NULL)
 461                 return;
 462         ncp->dp = dp;
 463         VN_HOLD_DNLC(dp);
 464         ncp->vp = vp;
 465         VN_HOLD_DNLC(vp);
 466         bcopy(name, ncp->name, namlen + 1); /* name and null */
 467         ncp->hash = hash;
 468         hp = &nc_hash[hash & nc_hashmask];
 469 
 470         mutex_enter(&hp->hash_lock);
 471         if (dnlc_search(dp, name, namlen, hash) != NULL) {
 472                 mutex_exit(&hp->hash_lock);
 473                 ncstats.dbl_enters++;
 474                 ncs.ncs_dbl_enters.value.ui64++;
 475                 VN_RELE_DNLC(dp);
 476                 VN_RELE_DNLC(vp);
 477                 dnlc_free(ncp);         /* crfree done here */
 478                 TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
 479                     "dnlc_enter_end:(%S) %d", "dbl enter", ncstats.dbl_enters);
 480                 return;
 481         }
 482         /*
 483          * Insert back into the hash chain.
 484          */
 485         nc_inshash(ncp, hp);
 486         mutex_exit(&hp->hash_lock);
 487         ncstats.enters++;
 488         ncs.ncs_enters.value.ui64++;
 489         TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
 490             "dnlc_enter_end:(%S) %d", "done", ncstats.enters);
 491 }
 492 
 493 /*
 494  * Add a name to the directory cache.
 495  *
 496  * This function is basically identical with
 497  * dnlc_enter().  The difference is that when the
 498  * desired dnlc entry is found, the vnode in the
 499  * ncache is compared with the vnode passed in.
 500  *
 501  * If they are not equal then the ncache is
 502  * updated with the passed in vnode.  Otherwise
 503  * it just frees up the newly allocated dnlc entry.
 504  */
 505 void
 506 dnlc_update(vnode_t *dp, const char *name, vnode_t *vp)
 507 {
 508         ncache_t *ncp;
 509         ncache_t *tcp;
 510         vnode_t *tvp;
 511         nc_hash_t *hp;
 512         int hash;
 513         uchar_t namlen;
 514 
 515         TRACE_0(TR_FAC_NFS, TR_DNLC_ENTER_START, "dnlc_update_start:");
 516 
 517         if (!doingcache) {
 518                 TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
 519                     "dnlc_update_end:(%S) %d", "not caching", 0);
 520                 return;
 521         }
 522 
 523         /*
 524          * Get a new dnlc entry and initialize it now.
 525          * If we fail to get a new entry, call dnlc_remove() to purge
 526          * any existing dnlc entry including negative cache (DNLC_NO_VNODE)
 527          * entry.
 528          * Failure to clear an existing entry could result in false dnlc
 529          * lookup (negative/stale entry).
 530          */
 531         DNLCHASH(name, dp, hash, namlen);
 532         if ((ncp = dnlc_get(namlen)) == NULL) {
 533                 dnlc_remove(dp, name);
 534                 return;
 535         }
 536         ncp->dp = dp;
 537         VN_HOLD_DNLC(dp);
 538         ncp->vp = vp;
 539         VN_HOLD_DNLC(vp);
 540         bcopy(name, ncp->name, namlen + 1); /* name and null */
 541         ncp->hash = hash;
 542         hp = &nc_hash[hash & nc_hashmask];
 543 
 544         mutex_enter(&hp->hash_lock);
 545         if ((tcp = dnlc_search(dp, name, namlen, hash)) != NULL) {
 546                 if (tcp->vp != vp) {
 547                         tvp = tcp->vp;
 548                         tcp->vp = vp;
 549                         mutex_exit(&hp->hash_lock);
 550                         VN_RELE_DNLC(tvp);
 551                         ncstats.enters++;
 552                         ncs.ncs_enters.value.ui64++;
 553                         TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
 554                             "dnlc_update_end:(%S) %d", "done", ncstats.enters);
 555                 } else {
 556                         mutex_exit(&hp->hash_lock);
 557                         VN_RELE_DNLC(vp);
 558                         ncstats.dbl_enters++;
 559                         ncs.ncs_dbl_enters.value.ui64++;
 560                         TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
 561                             "dnlc_update_end:(%S) %d",
 562                             "dbl enter", ncstats.dbl_enters);
 563                 }
 564                 VN_RELE_DNLC(dp);
 565                 dnlc_free(ncp);         /* crfree done here */
 566                 return;
 567         }
 568         /*
 569          * insert the new entry, since it is not in dnlc yet
 570          */
 571         nc_inshash(ncp, hp);
 572         mutex_exit(&hp->hash_lock);
 573         ncstats.enters++;
 574         ncs.ncs_enters.value.ui64++;
 575         TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
 576             "dnlc_update_end:(%S) %d", "done", ncstats.enters);
 577 }
 578 
 579 /*
 580  * Look up a name in the directory name cache.
 581  *
 582  * Return a doubly-held vnode if found: one hold so that it may
 583  * remain in the cache for other users, the other hold so that
 584  * the cache is not re-cycled and the identity of the vnode is
 585  * lost before the caller can use the vnode.
 586  */
 587 vnode_t *
 588 dnlc_lookup(vnode_t *dp, const char *name)
 589 {
 590         ncache_t *ncp;
 591         nc_hash_t *hp;
 592         vnode_t *vp;
 593         int hash, depth;
 594         uchar_t namlen;
 595 
 596         TRACE_2(TR_FAC_NFS, TR_DNLC_LOOKUP_START,
 597             "dnlc_lookup_start:dp %x name %s", dp, name);
 598 
 599         if (!doingcache) {
 600                 TRACE_4(TR_FAC_NFS, TR_DNLC_LOOKUP_END,
 601                     "dnlc_lookup_end:%S %d vp %x name %s",
 602                     "not_caching", 0, NULL, name);
 603                 return (NULL);
 604         }
 605 
 606         DNLCHASH(name, dp, hash, namlen);
 607         depth = 1;
 608         hp = &nc_hash[hash & nc_hashmask];
 609         mutex_enter(&hp->hash_lock);
 610 
 611         for (ncp = hp->hash_next; ncp != (ncache_t *)hp;
 612             ncp = ncp->hash_next) {
 613                 if (ncp->hash == hash &&     /* fast signature check */
 614                     ncp->dp == dp &&
 615                     ncp->namlen == namlen &&
 616                     bcmp(ncp->name, name, namlen) == 0) {
 617                         /*
 618                          * Move this entry to the head of its hash chain
 619                          * if it's not already close.
 620                          */
 621                         if (depth > NC_MOVETOFRONT) {
 622                                 ncache_t *next = ncp->hash_next;
 623                                 ncache_t *prev = ncp->hash_prev;
 624 
 625                                 prev->hash_next = next;
 626                                 next->hash_prev = prev;
 627                                 ncp->hash_next = next = hp->hash_next;
 628                                 ncp->hash_prev = (ncache_t *)hp;
 629                                 next->hash_prev = ncp;
 630                                 hp->hash_next = ncp;
 631 
 632                                 ncstats.move_to_front++;
 633                         }
 634 
 635                         /*
 636                          * Put a hold on the vnode now so its identity
 637                          * can't change before the caller has a chance to
 638                          * put a hold on it.
 639                          */
 640                         vp = ncp->vp;
 641                         VN_HOLD_CALLER(vp);
 642                         mutex_exit(&hp->hash_lock);
 643                         ncstats.hits++;
 644                         ncs.ncs_hits.value.ui64++;
 645                         if (vp == DNLC_NO_VNODE) {
 646                                 ncs.ncs_neg_hits.value.ui64++;
 647                         }
 648                         TRACE_4(TR_FAC_NFS, TR_DNLC_LOOKUP_END,
 649                             "dnlc_lookup_end:%S %d vp %x name %s", "hit",
 650                             ncstats.hits, vp, name);
 651                         return (vp);
 652                 }
 653                 depth++;
 654         }
 655 
 656         mutex_exit(&hp->hash_lock);
 657         ncstats.misses++;
 658         ncs.ncs_misses.value.ui64++;
 659         TRACE_4(TR_FAC_NFS, TR_DNLC_LOOKUP_END,
 660             "dnlc_lookup_end:%S %d vp %x name %s", "miss", ncstats.misses,
 661             NULL, name);
 662         return (NULL);
 663 }
 664 
 665 /*
 666  * Remove an entry in the directory name cache.
 667  */
 668 void
 669 dnlc_remove(vnode_t *dp, const char *name)
 670 {
 671         ncache_t *ncp;
 672         nc_hash_t *hp;
 673         uchar_t namlen;
 674         int hash;
 675 
 676         if (!doingcache)
 677                 return;
 678         DNLCHASH(name, dp, hash, namlen);
 679         hp = &nc_hash[hash & nc_hashmask];
 680 
 681         mutex_enter(&hp->hash_lock);
 682         if (ncp = dnlc_search(dp, name, namlen, hash)) {
 683                 /*
 684                  * Free up the entry
 685                  */
 686                 nc_rmhash(ncp);
 687                 mutex_exit(&hp->hash_lock);
 688                 VN_RELE_DNLC(ncp->vp);
 689                 VN_RELE_DNLC(ncp->dp);
 690                 dnlc_free(ncp);
 691                 return;
 692         }
 693         mutex_exit(&hp->hash_lock);
 694 }
 695 
 696 /*
 697  * Purge the entire cache.
 698  */
 699 void
 700 dnlc_purge()
 701 {
 702         nc_hash_t *nch;
 703         ncache_t *ncp;
 704         int index;
 705         int i;
 706         vnode_t *nc_rele[DNLC_MAX_RELE];
 707 
 708         if (!doingcache)
 709                 return;
 710 
 711         ncstats.purges++;
 712         ncs.ncs_purge_all.value.ui64++;
 713 
 714         for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
 715                 index = 0;
 716                 mutex_enter(&nch->hash_lock);
 717                 ncp = nch->hash_next;
 718                 while (ncp != (ncache_t *)nch) {
 719                         ncache_t *np;
 720 
 721                         np = ncp->hash_next;
 722                         nc_rele[index++] = ncp->vp;
 723                         nc_rele[index++] = ncp->dp;
 724 
 725                         nc_rmhash(ncp);
 726                         dnlc_free(ncp);
 727                         ncp = np;
 728                         ncs.ncs_purge_total.value.ui64++;
 729                         if (index == DNLC_MAX_RELE)
 730                                 break;
 731                 }
 732                 mutex_exit(&nch->hash_lock);
 733 
 734                 /* Release holds on all the vnodes now that we have no locks */
 735                 for (i = 0; i < index; i++) {
 736                         VN_RELE_DNLC(nc_rele[i]);
 737                 }
 738                 if (ncp != (ncache_t *)nch) {
 739                         nch--; /* Do current hash chain again */
 740                 }
 741         }
 742 }
 743 
 744 /*
 745  * Purge any cache entries referencing a vnode. Exit as soon as the dnlc
 746  * reference count goes to zero (the caller still holds a reference).
 747  */
 748 void
 749 dnlc_purge_vp(vnode_t *vp)
 750 {
 751         nc_hash_t *nch;
 752         ncache_t *ncp;
 753         int index;
 754         vnode_t *nc_rele[DNLC_MAX_RELE];
 755 
 756         ASSERT(vp->v_count > 0);
 757         if (vp->v_count_dnlc == 0) {
 758                 return;
 759         }
 760 
 761         if (!doingcache)
 762                 return;
 763 
 764         ncstats.purges++;
 765         ncs.ncs_purge_vp.value.ui64++;
 766 
 767         for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
 768                 index = 0;
 769                 mutex_enter(&nch->hash_lock);
 770                 ncp = nch->hash_next;
 771                 while (ncp != (ncache_t *)nch) {
 772                         ncache_t *np;
 773 
 774                         np = ncp->hash_next;
 775                         if (ncp->dp == vp || ncp->vp == vp) {
 776                                 nc_rele[index++] = ncp->vp;
 777                                 nc_rele[index++] = ncp->dp;
 778                                 nc_rmhash(ncp);
 779                                 dnlc_free(ncp);
 780                                 ncs.ncs_purge_total.value.ui64++;
 781                                 if (index == DNLC_MAX_RELE) {
 782                                         ncp = np;
 783                                         break;
 784                                 }
 785                         }
 786                         ncp = np;
 787                 }
 788                 mutex_exit(&nch->hash_lock);
 789 
 790                 /* Release holds on all the vnodes now that we have no locks */
 791                 while (index) {
 792                         VN_RELE_DNLC(nc_rele[--index]);
 793                 }
 794 
 795                 if (vp->v_count_dnlc == 0) {
 796                         return;
 797                 }
 798 
 799                 if (ncp != (ncache_t *)nch) {
 800                         nch--; /* Do current hash chain again */
 801                 }
 802         }
 803 }
 804 
 805 /*
 806  * Purge cache entries referencing a vfsp.  Caller supplies a count
 807  * of entries to purge; up to that many will be freed.  A count of
 808  * zero indicates that all such entries should be purged.  Returns
 809  * the number of entries that were purged.
 810  */
 811 int
 812 dnlc_purge_vfsp(vfs_t *vfsp, int count)
 813 {
 814         nc_hash_t *nch;
 815         ncache_t *ncp;
 816         int n = 0;
 817         int index;
 818         int i;
 819         vnode_t *nc_rele[DNLC_MAX_RELE];
 820 
 821         if (!doingcache)
 822                 return (0);
 823 
 824         ncstats.purges++;
 825         ncs.ncs_purge_vfs.value.ui64++;
 826 
 827         for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
 828                 index = 0;
 829                 mutex_enter(&nch->hash_lock);
 830                 ncp = nch->hash_next;
 831                 while (ncp != (ncache_t *)nch) {
 832                         ncache_t *np;
 833 
 834                         np = ncp->hash_next;
 835                         ASSERT(ncp->dp != NULL);
 836                         ASSERT(ncp->vp != NULL);
 837                         if ((ncp->dp->v_vfsp == vfsp) ||
 838                             (ncp->vp->v_vfsp == vfsp)) {
 839                                 n++;
 840                                 nc_rele[index++] = ncp->vp;
 841                                 nc_rele[index++] = ncp->dp;
 842                                 nc_rmhash(ncp);
 843                                 dnlc_free(ncp);
 844                                 ncs.ncs_purge_total.value.ui64++;
 845                                 if (index == DNLC_MAX_RELE) {
 846                                         ncp = np;
 847                                         break;
 848                                 }
 849                                 if (count != 0 && n >= count) {
 850                                         break;
 851                                 }
 852                         }
 853                         ncp = np;
 854                 }
 855                 mutex_exit(&nch->hash_lock);
 856                 /* Release holds on all the vnodes now that we have no locks */
 857                 for (i = 0; i < index; i++) {
 858                         VN_RELE_DNLC(nc_rele[i]);
 859                 }
 860                 if (count != 0 && n >= count) {
 861                         return (n);
 862                 }
 863                 if (ncp != (ncache_t *)nch) {
 864                         nch--; /* Do current hash chain again */
 865                 }
 866         }
 867         return (n);
 868 }
 869 
 870 /*
 871  * Purge 1 entry from the dnlc that is part of the filesystem(s)
 872  * represented by 'vop'. The purpose of this routine is to allow
 873  * users of the dnlc to free a vnode that is being held by the dnlc.
 874  *
 875  * If we find a vnode that we release which will result in
 876  * freeing the underlying vnode (count was 1), return 1, 0
 877  * if no appropriate vnodes found.
 878  *
 879  * Note, vop is not the 'right' identifier for a filesystem.
 880  */
 881 int
 882 dnlc_fs_purge1(vnodeops_t *vop)
 883 {
 884         nc_hash_t *end;
 885         nc_hash_t *hp;
 886         ncache_t *ncp;
 887         vnode_t *vp;
 888 
 889         if (!doingcache)
 890                 return (0);
 891 
 892         ncs.ncs_purge_fs1.value.ui64++;
 893 
 894         /*
 895          * Scan the dnlc entries looking for a likely candidate.
 896          */
 897         hp = end = dnlc_purge_fs1_rotor;
 898 
 899         do {
 900                 if (++hp == &nc_hash[nc_hashsz])
 901                         hp = nc_hash;
 902                 dnlc_purge_fs1_rotor = hp;
 903                 if (hp->hash_next == (ncache_t *)hp)
 904                         continue;
 905                 mutex_enter(&hp->hash_lock);
 906                 for (ncp = hp->hash_prev;
 907                     ncp != (ncache_t *)hp;
 908                     ncp = ncp->hash_prev) {
 909                         vp = ncp->vp;
 910                         if (!vn_has_cached_data(vp) && (vp->v_count == 1) &&
 911                             vn_matchops(vp, vop))
 912                                 break;
 913                 }
 914                 if (ncp != (ncache_t *)hp) {
 915                         nc_rmhash(ncp);
 916                         mutex_exit(&hp->hash_lock);
 917                         VN_RELE_DNLC(ncp->dp);
 918                         VN_RELE_DNLC(vp)
 919                         dnlc_free(ncp);
 920                         ncs.ncs_purge_total.value.ui64++;
 921                         return (1);
 922                 }
 923                 mutex_exit(&hp->hash_lock);
 924         } while (hp != end);
 925         return (0);
 926 }
 927 
 928 /*
 929  * Utility routine to search for a cache entry. Return the
 930  * ncache entry if found, NULL otherwise.
 931  */
 932 static ncache_t *
 933 dnlc_search(vnode_t *dp, const char *name, uchar_t namlen, int hash)
 934 {
 935         nc_hash_t *hp;
 936         ncache_t *ncp;
 937 
 938         hp = &nc_hash[hash & nc_hashmask];
 939 
 940         for (ncp = hp->hash_next; ncp != (ncache_t *)hp; ncp = ncp->hash_next) {
 941                 if (ncp->hash == hash &&
 942                     ncp->dp == dp &&
 943                     ncp->namlen == namlen &&
 944                     bcmp(ncp->name, name, namlen) == 0)
 945                         return (ncp);
 946         }
 947         return (NULL);
 948 }
 949 
 950 #if ((1 << NBBY) - 1) < (MAXNAMELEN - 1)
 951 #error ncache_t name length representation is too small
 952 #endif
 953 
 954 void
 955 dnlc_reduce_cache(void *reduce_percent)
 956 {
 957         if (dnlc_reduce_idle && (dnlc_nentries >= ncsize || reduce_percent)) {
 958                 dnlc_reduce_idle = 0;
 959                 if ((taskq_dispatch(system_taskq, do_dnlc_reduce_cache,
 960                     reduce_percent, TQ_NOSLEEP)) == NULL)
 961                         dnlc_reduce_idle = 1;
 962         }
 963 }
 964 
 965 /*
 966  * Get a new name cache entry.
 967  * If the dnlc_reduce_cache() taskq isn't keeping up with demand, or memory
 968  * is short then just return NULL. If we're over ncsize then kick off a
 969  * thread to free some in use entries down to dnlc_nentries_low_water.
 970  * Caller must initialise all fields except namlen.
 971  * Component names are defined to be less than MAXNAMELEN
 972  * which includes a null.
 973  */
 974 static ncache_t *
 975 dnlc_get(uchar_t namlen)
 976 {
 977         ncache_t *ncp;
 978 
 979         if (dnlc_nentries > dnlc_max_nentries) {
 980                 dnlc_max_nentries_cnt++; /* keep a statistic */
 981                 return (NULL);
 982         }
 983         ncp = kmem_alloc(sizeof (ncache_t) + namlen, KM_NOSLEEP);
 984         if (ncp == NULL) {
 985                 return (NULL);
 986         }
 987         ncp->namlen = namlen;
 988         atomic_inc_32(&dnlc_nentries);
 989         dnlc_reduce_cache(NULL);
 990         return (ncp);
 991 }
 992 
 993 /*
 994  * Taskq routine to free up name cache entries to reduce the
 995  * cache size to the low water mark if "reduce_percent" is not provided.
 996  * If "reduce_percent" is provided, reduce cache size by
 997  * (ncsize_onepercent * reduce_percent).
 998  */
 999 /*ARGSUSED*/
1000 static void
1001 do_dnlc_reduce_cache(void *reduce_percent)
1002 {
1003         nc_hash_t *hp = dnlc_free_rotor, *start_hp = hp;
1004         vnode_t *vp;
1005         ncache_t *ncp;
1006         int cnt;
1007         uint_t low_water = dnlc_nentries_low_water;
1008 
1009         if (reduce_percent) {
1010                 uint_t reduce_cnt;
1011 
1012                 /*
1013                  * Never try to reduce the current number
1014                  * of cache entries below 3% of ncsize.
1015                  */
1016                 if (dnlc_nentries <= ncsize_min_percent) {
1017                         dnlc_reduce_idle = 1;
1018                         return;
1019                 }
1020                 reduce_cnt = ncsize_onepercent *
1021                     (uint_t)(uintptr_t)reduce_percent;
1022 
1023                 if (reduce_cnt > dnlc_nentries ||
1024                     dnlc_nentries - reduce_cnt < ncsize_min_percent)
1025                         low_water = ncsize_min_percent;
1026                 else
1027                         low_water = dnlc_nentries - reduce_cnt;
1028         }
1029 
1030         do {
1031                 /*
1032                  * Find the first non empty hash queue without locking.
1033                  * Only look at each hash queue once to avoid an infinite loop.
1034                  */
1035                 do {
1036                         if (++hp == &nc_hash[nc_hashsz])
1037                                 hp = nc_hash;
1038                 } while (hp->hash_next == (ncache_t *)hp && hp != start_hp);
1039 
1040                 /* return if all hash queues are empty. */
1041                 if (hp->hash_next == (ncache_t *)hp) {
1042                         dnlc_reduce_idle = 1;
1043                         return;
1044                 }
1045 
1046                 mutex_enter(&hp->hash_lock);
1047                 for (cnt = 0, ncp = hp->hash_prev; ncp != (ncache_t *)hp;
1048                     ncp = ncp->hash_prev, cnt++) {
1049                         vp = ncp->vp;
1050                         /*
1051                          * A name cache entry with a reference count
1052                          * of one is only referenced by the dnlc.
1053                          * Also negative cache entries are purged first.
1054                          */
1055                         if (!vn_has_cached_data(vp) &&
1056                             ((vp->v_count == 1) || (vp == DNLC_NO_VNODE))) {
1057                                 ncs.ncs_pick_heur.value.ui64++;
1058                                 goto found;
1059                         }
1060                         /*
1061                          * Remove from the end of the chain if the
1062                          * chain is too long
1063                          */
1064                         if (cnt > dnlc_long_chain) {
1065                                 ncp = hp->hash_prev;
1066                                 ncs.ncs_pick_last.value.ui64++;
1067                                 vp = ncp->vp;
1068                                 goto found;
1069                         }
1070                 }
1071                 /* check for race and continue */
1072                 if (hp->hash_next == (ncache_t *)hp) {
1073                         mutex_exit(&hp->hash_lock);
1074                         continue;
1075                 }
1076 
1077                 ncp = hp->hash_prev; /* pick the last one in the hash queue */
1078                 ncs.ncs_pick_last.value.ui64++;
1079                 vp = ncp->vp;
1080 found:
1081                 /*
1082                  * Remove from hash chain.
1083                  */
1084                 nc_rmhash(ncp);
1085                 mutex_exit(&hp->hash_lock);
1086                 VN_RELE_DNLC(vp);
1087                 VN_RELE_DNLC(ncp->dp);
1088                 dnlc_free(ncp);
1089         } while (dnlc_nentries > low_water);
1090 
1091         dnlc_free_rotor = hp;
1092         dnlc_reduce_idle = 1;
1093 }
1094 
1095 /*
1096  * Directory caching routines
1097  * ==========================
1098  *
1099  * See dnlc.h for details of the interfaces below.
1100  */
1101 
1102 /*
1103  * Lookup up an entry in a complete or partial directory cache.
1104  */
1105 dcret_t
1106 dnlc_dir_lookup(dcanchor_t *dcap, const char *name, uint64_t *handle)
1107 {
1108         dircache_t *dcp;
1109         dcentry_t *dep;
1110         int hash;
1111         int ret;
1112         uchar_t namlen;
1113 
1114         /*
1115          * can test without lock as we are only a cache
1116          */
1117         if (!VALID_DIR_CACHE(dcap->dca_dircache)) {
1118                 ncs.ncs_dir_misses.value.ui64++;
1119                 return (DNOCACHE);
1120         }
1121 
1122         if (!dnlc_dir_enable) {
1123                 return (DNOCACHE);
1124         }
1125 
1126         mutex_enter(&dcap->dca_lock);
1127         dcp = (dircache_t *)dcap->dca_dircache;
1128         if (VALID_DIR_CACHE(dcp)) {
1129                 dcp->dc_actime = ddi_get_lbolt64();
1130                 DNLC_DIR_HASH(name, hash, namlen);
1131                 dep = dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1132                 while (dep != NULL) {
1133                         if ((dep->de_hash == hash) &&
1134                             (namlen == dep->de_namelen) &&
1135                             bcmp(dep->de_name, name, namlen) == 0) {
1136                                 *handle = dep->de_handle;
1137                                 mutex_exit(&dcap->dca_lock);
1138                                 ncs.ncs_dir_hits.value.ui64++;
1139                                 return (DFOUND);
1140                         }
1141                         dep = dep->de_next;
1142                 }
1143                 if (dcp->dc_complete) {
1144                         ret = DNOENT;
1145                 } else {
1146                         ret = DNOCACHE;
1147                 }
1148                 mutex_exit(&dcap->dca_lock);
1149                 return (ret);
1150         } else {
1151                 mutex_exit(&dcap->dca_lock);
1152                 ncs.ncs_dir_misses.value.ui64++;
1153                 return (DNOCACHE);
1154         }
1155 }
1156 
1157 /*
1158  * Start a new directory cache. An estimate of the number of
1159  * entries is provided to as a quick check to ensure the directory
1160  * is cacheable.
1161  */
1162 dcret_t
1163 dnlc_dir_start(dcanchor_t *dcap, uint_t num_entries)
1164 {
1165         dircache_t *dcp;
1166 
1167         if (!dnlc_dir_enable ||
1168             (num_entries < dnlc_dir_min_size)) {
1169                 return (DNOCACHE);
1170         }
1171 
1172         if (num_entries > dnlc_dir_max_size) {
1173                 return (DTOOBIG);
1174         }
1175 
1176         mutex_enter(&dc_head.dch_lock);
1177         mutex_enter(&dcap->dca_lock);
1178 
1179         if (dcap->dca_dircache == DC_RET_LOW_MEM) {
1180                 dcap->dca_dircache = NULL;
1181                 mutex_exit(&dcap->dca_lock);
1182                 mutex_exit(&dc_head.dch_lock);
1183                 return (DNOMEM);
1184         }
1185 
1186         /*
1187          * Check if there's currently a cache.
1188          * This probably only occurs on a race.
1189          */
1190         if (dcap->dca_dircache != NULL) {
1191                 mutex_exit(&dcap->dca_lock);
1192                 mutex_exit(&dc_head.dch_lock);
1193                 return (DNOCACHE);
1194         }
1195 
1196         /*
1197          * Allocate the dircache struct, entry and free space hash tables.
1198          * These tables are initially just one entry but dynamically resize
1199          * when entries and free space are added or removed.
1200          */
1201         if ((dcp = kmem_zalloc(sizeof (dircache_t), KM_NOSLEEP)) == NULL) {
1202                 goto error;
1203         }
1204         if ((dcp->dc_namehash = kmem_zalloc(sizeof (dcentry_t *),
1205             KM_NOSLEEP)) == NULL) {
1206                 goto error;
1207         }
1208         if ((dcp->dc_freehash = kmem_zalloc(sizeof (dcfree_t *),
1209             KM_NOSLEEP)) == NULL) {
1210                 goto error;
1211         }
1212 
1213         dcp->dc_anchor = dcap; /* set back pointer to anchor */
1214         dcap->dca_dircache = dcp;
1215 
1216         /* add into head of global chain */
1217         dcp->dc_next = dc_head.dch_next;
1218         dcp->dc_prev = (dircache_t *)&dc_head;
1219         dcp->dc_next->dc_prev = dcp;
1220         dc_head.dch_next = dcp;
1221 
1222         mutex_exit(&dcap->dca_lock);
1223         mutex_exit(&dc_head.dch_lock);
1224         ncs.ncs_cur_dirs.value.ui64++;
1225         ncs.ncs_dirs_cached.value.ui64++;
1226         return (DOK);
1227 error:
1228         if (dcp != NULL) {
1229                 if (dcp->dc_namehash) {
1230                         kmem_free(dcp->dc_namehash, sizeof (dcentry_t *));
1231                 }
1232                 kmem_free(dcp, sizeof (dircache_t));
1233         }
1234         /*
1235          * Must also kmem_free dcp->dc_freehash if more error cases are added
1236          */
1237         mutex_exit(&dcap->dca_lock);
1238         mutex_exit(&dc_head.dch_lock);
1239         ncs.ncs_dir_start_nm.value.ui64++;
1240         return (DNOCACHE);
1241 }
1242 
1243 /*
1244  * Add a directopry entry to a partial or complete directory cache.
1245  */
1246 dcret_t
1247 dnlc_dir_add_entry(dcanchor_t *dcap, const char *name, uint64_t handle)
1248 {
1249         dircache_t *dcp;
1250         dcentry_t **hp, *dep;
1251         int hash;
1252         uint_t capacity;
1253         uchar_t namlen;
1254 
1255         /*
1256          * Allocate the dcentry struct, including the variable
1257          * size name. Note, the null terminator is not copied.
1258          *
1259          * We do this outside the lock to avoid possible deadlock if
1260          * dnlc_dir_reclaim() is called as a result of memory shortage.
1261          */
1262         DNLC_DIR_HASH(name, hash, namlen);
1263         dep = kmem_alloc(sizeof (dcentry_t) - 1 + namlen, KM_NOSLEEP);
1264         if (dep == NULL) {
1265 #ifdef DEBUG
1266                 /*
1267                  * The kmem allocator generates random failures for
1268                  * KM_NOSLEEP calls (see KMEM_RANDOM_ALLOCATION_FAILURE)
1269                  * So try again before we blow away a perfectly good cache.
1270                  * This is done not to cover an error but purely for
1271                  * performance running a debug kernel.
1272                  * This random error only occurs in debug mode.
1273                  */
1274                 dep = kmem_alloc(sizeof (dcentry_t) - 1 + namlen, KM_NOSLEEP);
1275                 if (dep != NULL)
1276                         goto ok;
1277 #endif
1278                 ncs.ncs_dir_add_nm.value.ui64++;
1279                 /*
1280                  * Free a directory cache. This may be the one we are
1281                  * called with.
1282                  */
1283                 dnlc_dir_reclaim(NULL);
1284                 dep = kmem_alloc(sizeof (dcentry_t) - 1 + namlen, KM_NOSLEEP);
1285                 if (dep == NULL) {
1286                         /*
1287                          * still no memory, better delete this cache
1288                          */
1289                         mutex_enter(&dcap->dca_lock);
1290                         dcp = (dircache_t *)dcap->dca_dircache;
1291                         if (VALID_DIR_CACHE(dcp)) {
1292                                 dnlc_dir_abort(dcp);
1293                                 dcap->dca_dircache = DC_RET_LOW_MEM;
1294                         }
1295                         mutex_exit(&dcap->dca_lock);
1296                         ncs.ncs_dir_addabort.value.ui64++;
1297                         return (DNOCACHE);
1298                 }
1299                 /*
1300                  * fall through as if the 1st kmem_alloc had worked
1301                  */
1302         }
1303 #ifdef DEBUG
1304 ok:
1305 #endif
1306         mutex_enter(&dcap->dca_lock);
1307         dcp = (dircache_t *)dcap->dca_dircache;
1308         if (VALID_DIR_CACHE(dcp)) {
1309                 /*
1310                  * If the total number of entries goes above the max
1311                  * then free this cache
1312                  */
1313                 if ((dcp->dc_num_entries + dcp->dc_num_free) >
1314                     dnlc_dir_max_size) {
1315                         mutex_exit(&dcap->dca_lock);
1316                         dnlc_dir_purge(dcap);
1317                         kmem_free(dep, sizeof (dcentry_t) - 1 + namlen);
1318                         ncs.ncs_dir_add_max.value.ui64++;
1319                         return (DTOOBIG);
1320                 }
1321                 dcp->dc_num_entries++;
1322                 capacity = (dcp->dc_nhash_mask + 1) << dnlc_dir_hash_size_shift;
1323                 if (dcp->dc_num_entries >=
1324                     (capacity << dnlc_dir_hash_resize_shift)) {
1325                         dnlc_dir_adjust_nhash(dcp);
1326                 }
1327                 hp = &dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1328 
1329                 /*
1330                  * Initialise and chain in new entry
1331                  */
1332                 dep->de_handle = handle;
1333                 dep->de_hash = hash;
1334                 /*
1335                  * Note de_namelen is a uchar_t to conserve space
1336                  * and alignment padding. The max length of any
1337                  * pathname component is defined as MAXNAMELEN
1338                  * which is 256 (including the terminating null).
1339                  * So provided this doesn't change, we don't include the null,
1340                  * we always use bcmp to compare strings, and we don't
1341                  * start storing full names, then we are ok.
1342                  * The space savings is worth it.
1343                  */
1344                 dep->de_namelen = namlen;
1345                 bcopy(name, dep->de_name, namlen);
1346                 dep->de_next = *hp;
1347                 *hp = dep;
1348                 dcp->dc_actime = ddi_get_lbolt64();
1349                 mutex_exit(&dcap->dca_lock);
1350                 ncs.ncs_dir_num_ents.value.ui64++;
1351                 return (DOK);
1352         } else {
1353                 mutex_exit(&dcap->dca_lock);
1354                 kmem_free(dep, sizeof (dcentry_t) - 1 + namlen);
1355                 return (DNOCACHE);
1356         }
1357 }
1358 
1359 /*
1360  * Add free space to a partial or complete directory cache.
1361  */
1362 dcret_t
1363 dnlc_dir_add_space(dcanchor_t *dcap, uint_t len, uint64_t handle)
1364 {
1365         dircache_t *dcp;
1366         dcfree_t *dfp, **hp;
1367         uint_t capacity;
1368 
1369         /*
1370          * We kmem_alloc outside the lock to avoid possible deadlock if
1371          * dnlc_dir_reclaim() is called as a result of memory shortage.
1372          */
1373         dfp = kmem_cache_alloc(dnlc_dir_space_cache, KM_NOSLEEP);
1374         if (dfp == NULL) {
1375 #ifdef DEBUG
1376                 /*
1377                  * The kmem allocator generates random failures for
1378                  * KM_NOSLEEP calls (see KMEM_RANDOM_ALLOCATION_FAILURE)
1379                  * So try again before we blow away a perfectly good cache.
1380                  * This random error only occurs in debug mode
1381                  */
1382                 dfp = kmem_cache_alloc(dnlc_dir_space_cache, KM_NOSLEEP);
1383                 if (dfp != NULL)
1384                         goto ok;
1385 #endif
1386                 ncs.ncs_dir_add_nm.value.ui64++;
1387                 /*
1388                  * Free a directory cache. This may be the one we are
1389                  * called with.
1390                  */
1391                 dnlc_dir_reclaim(NULL);
1392                 dfp = kmem_cache_alloc(dnlc_dir_space_cache, KM_NOSLEEP);
1393                 if (dfp == NULL) {
1394                         /*
1395                          * still no memory, better delete this cache
1396                          */
1397                         mutex_enter(&dcap->dca_lock);
1398                         dcp = (dircache_t *)dcap->dca_dircache;
1399                         if (VALID_DIR_CACHE(dcp)) {
1400                                 dnlc_dir_abort(dcp);
1401                                 dcap->dca_dircache = DC_RET_LOW_MEM;
1402                         }
1403                         mutex_exit(&dcap->dca_lock);
1404                         ncs.ncs_dir_addabort.value.ui64++;
1405                         return (DNOCACHE);
1406                 }
1407                 /*
1408                  * fall through as if the 1st kmem_alloc had worked
1409                  */
1410         }
1411 
1412 #ifdef DEBUG
1413 ok:
1414 #endif
1415         mutex_enter(&dcap->dca_lock);
1416         dcp = (dircache_t *)dcap->dca_dircache;
1417         if (VALID_DIR_CACHE(dcp)) {
1418                 if ((dcp->dc_num_entries + dcp->dc_num_free) >
1419                     dnlc_dir_max_size) {
1420                         mutex_exit(&dcap->dca_lock);
1421                         dnlc_dir_purge(dcap);
1422                         kmem_cache_free(dnlc_dir_space_cache, dfp);
1423                         ncs.ncs_dir_add_max.value.ui64++;
1424                         return (DTOOBIG);
1425                 }
1426                 dcp->dc_num_free++;
1427                 capacity = (dcp->dc_fhash_mask + 1) << dnlc_dir_hash_size_shift;
1428                 if (dcp->dc_num_free >=
1429                     (capacity << dnlc_dir_hash_resize_shift)) {
1430                         dnlc_dir_adjust_fhash(dcp);
1431                 }
1432                 /*
1433                  * Initialise and chain a new entry
1434                  */
1435                 dfp->df_handle = handle;
1436                 dfp->df_len = len;
1437                 dcp->dc_actime = ddi_get_lbolt64();
1438                 hp = &(dcp->dc_freehash[DDFHASH(handle, dcp)]);
1439                 dfp->df_next = *hp;
1440                 *hp = dfp;
1441                 mutex_exit(&dcap->dca_lock);
1442                 ncs.ncs_dir_num_ents.value.ui64++;
1443                 return (DOK);
1444         } else {
1445                 mutex_exit(&dcap->dca_lock);
1446                 kmem_cache_free(dnlc_dir_space_cache, dfp);
1447                 return (DNOCACHE);
1448         }
1449 }
1450 
1451 /*
1452  * Mark a directory cache as complete.
1453  */
1454 void
1455 dnlc_dir_complete(dcanchor_t *dcap)
1456 {
1457         dircache_t *dcp;
1458 
1459         mutex_enter(&dcap->dca_lock);
1460         dcp = (dircache_t *)dcap->dca_dircache;
1461         if (VALID_DIR_CACHE(dcp)) {
1462                 dcp->dc_complete = B_TRUE;
1463         }
1464         mutex_exit(&dcap->dca_lock);
1465 }
1466 
1467 /*
1468  * Internal routine to delete a partial or full directory cache.
1469  * No additional locking needed.
1470  */
1471 static void
1472 dnlc_dir_abort(dircache_t *dcp)
1473 {
1474         dcentry_t *dep, *nhp;
1475         dcfree_t *fep, *fhp;
1476         uint_t nhtsize = dcp->dc_nhash_mask + 1; /* name hash table size */
1477         uint_t fhtsize = dcp->dc_fhash_mask + 1; /* free hash table size */
1478         uint_t i;
1479 
1480         /*
1481          * Free up the cached name entries and hash table
1482          */
1483         for (i = 0; i < nhtsize; i++) { /* for each hash bucket */
1484                 nhp = dcp->dc_namehash[i];
1485                 while (nhp != NULL) { /* for each chained entry */
1486                         dep = nhp->de_next;
1487                         kmem_free(nhp, sizeof (dcentry_t) - 1 +
1488                             nhp->de_namelen);
1489                         nhp = dep;
1490                 }
1491         }
1492         kmem_free(dcp->dc_namehash, sizeof (dcentry_t *) * nhtsize);
1493 
1494         /*
1495          * Free up the free space entries and hash table
1496          */
1497         for (i = 0; i < fhtsize; i++) { /* for each hash bucket */
1498                 fhp = dcp->dc_freehash[i];
1499                 while (fhp != NULL) { /* for each chained entry */
1500                         fep = fhp->df_next;
1501                         kmem_cache_free(dnlc_dir_space_cache, fhp);
1502                         fhp = fep;
1503                 }
1504         }
1505         kmem_free(dcp->dc_freehash, sizeof (dcfree_t *) * fhtsize);
1506 
1507         /*
1508          * Finally free the directory cache structure itself
1509          */
1510         ncs.ncs_dir_num_ents.value.ui64 -= (dcp->dc_num_entries +
1511             dcp->dc_num_free);
1512         kmem_free(dcp, sizeof (dircache_t));
1513         ncs.ncs_cur_dirs.value.ui64--;
1514 }
1515 
1516 /*
1517  * Remove a partial or complete directory cache
1518  */
1519 void
1520 dnlc_dir_purge(dcanchor_t *dcap)
1521 {
1522         dircache_t *dcp;
1523 
1524         mutex_enter(&dc_head.dch_lock);
1525         mutex_enter(&dcap->dca_lock);
1526         dcp = (dircache_t *)dcap->dca_dircache;
1527         if (!VALID_DIR_CACHE(dcp)) {
1528                 mutex_exit(&dcap->dca_lock);
1529                 mutex_exit(&dc_head.dch_lock);
1530                 return;
1531         }
1532         dcap->dca_dircache = NULL;
1533         /*
1534          * Unchain from global list
1535          */
1536         dcp->dc_prev->dc_next = dcp->dc_next;
1537         dcp->dc_next->dc_prev = dcp->dc_prev;
1538         mutex_exit(&dcap->dca_lock);
1539         mutex_exit(&dc_head.dch_lock);
1540         dnlc_dir_abort(dcp);
1541 }
1542 
1543 /*
1544  * Remove an entry from a complete or partial directory cache.
1545  * Return the handle if it's non null.
1546  */
1547 dcret_t
1548 dnlc_dir_rem_entry(dcanchor_t *dcap, const char *name, uint64_t *handlep)
1549 {
1550         dircache_t *dcp;
1551         dcentry_t **prevpp, *te;
1552         uint_t capacity;
1553         int hash;
1554         int ret;
1555         uchar_t namlen;
1556 
1557         if (!dnlc_dir_enable) {
1558                 return (DNOCACHE);
1559         }
1560 
1561         mutex_enter(&dcap->dca_lock);
1562         dcp = (dircache_t *)dcap->dca_dircache;
1563         if (VALID_DIR_CACHE(dcp)) {
1564                 dcp->dc_actime = ddi_get_lbolt64();
1565                 if (dcp->dc_nhash_mask > 0) { /* ie not minimum */
1566                         capacity = (dcp->dc_nhash_mask + 1) <<
1567                             dnlc_dir_hash_size_shift;
1568                         if (dcp->dc_num_entries <=
1569                             (capacity >> dnlc_dir_hash_resize_shift)) {
1570                                 dnlc_dir_adjust_nhash(dcp);
1571                         }
1572                 }
1573                 DNLC_DIR_HASH(name, hash, namlen);
1574                 prevpp = &dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1575                 while (*prevpp != NULL) {
1576                         if (((*prevpp)->de_hash == hash) &&
1577                             (namlen == (*prevpp)->de_namelen) &&
1578                             bcmp((*prevpp)->de_name, name, namlen) == 0) {
1579                                 if (handlep != NULL) {
1580                                         *handlep = (*prevpp)->de_handle;
1581                                 }
1582                                 te = *prevpp;
1583                                 *prevpp = (*prevpp)->de_next;
1584                                 kmem_free(te, sizeof (dcentry_t) - 1 +
1585                                     te->de_namelen);
1586 
1587                                 /*
1588                                  * If the total number of entries
1589                                  * falls below half the minimum number
1590                                  * of entries then free this cache.
1591                                  */
1592                                 if (--dcp->dc_num_entries <
1593                                     (dnlc_dir_min_size >> 1)) {
1594                                         mutex_exit(&dcap->dca_lock);
1595                                         dnlc_dir_purge(dcap);
1596                                 } else {
1597                                         mutex_exit(&dcap->dca_lock);
1598                                 }
1599                                 ncs.ncs_dir_num_ents.value.ui64--;
1600                                 return (DFOUND);
1601                         }
1602                         prevpp = &((*prevpp)->de_next);
1603                 }
1604                 if (dcp->dc_complete) {
1605                         ncs.ncs_dir_reme_fai.value.ui64++;
1606                         ret = DNOENT;
1607                 } else {
1608                         ret = DNOCACHE;
1609                 }
1610                 mutex_exit(&dcap->dca_lock);
1611                 return (ret);
1612         } else {
1613                 mutex_exit(&dcap->dca_lock);
1614                 return (DNOCACHE);
1615         }
1616 }
1617 
1618 
1619 /*
1620  * Remove free space of at least the given length from a complete
1621  * or partial directory cache.
1622  */
1623 dcret_t
1624 dnlc_dir_rem_space_by_len(dcanchor_t *dcap, uint_t len, uint64_t *handlep)
1625 {
1626         dircache_t *dcp;
1627         dcfree_t **prevpp, *tfp;
1628         uint_t fhtsize; /* free hash table size */
1629         uint_t i;
1630         uint_t capacity;
1631         int ret;
1632 
1633         if (!dnlc_dir_enable) {
1634                 return (DNOCACHE);
1635         }
1636 
1637         mutex_enter(&dcap->dca_lock);
1638         dcp = (dircache_t *)dcap->dca_dircache;
1639         if (VALID_DIR_CACHE(dcp)) {
1640                 dcp->dc_actime = ddi_get_lbolt64();
1641                 if (dcp->dc_fhash_mask > 0) { /* ie not minimum */
1642                         capacity = (dcp->dc_fhash_mask + 1) <<
1643                             dnlc_dir_hash_size_shift;
1644                         if (dcp->dc_num_free <=
1645                             (capacity >> dnlc_dir_hash_resize_shift)) {
1646                                 dnlc_dir_adjust_fhash(dcp);
1647                         }
1648                 }
1649                 /*
1650                  * Search for an entry of the appropriate size
1651                  * on a first fit basis.
1652                  */
1653                 fhtsize = dcp->dc_fhash_mask + 1;
1654                 for (i = 0; i < fhtsize; i++) { /* for each hash bucket */
1655                         prevpp = &(dcp->dc_freehash[i]);
1656                         while (*prevpp != NULL) {
1657                                 if ((*prevpp)->df_len >= len) {
1658                                         *handlep = (*prevpp)->df_handle;
1659                                         tfp = *prevpp;
1660                                         *prevpp = (*prevpp)->df_next;
1661                                         dcp->dc_num_free--;
1662                                         mutex_exit(&dcap->dca_lock);
1663                                         kmem_cache_free(dnlc_dir_space_cache,
1664                                             tfp);
1665                                         ncs.ncs_dir_num_ents.value.ui64--;
1666                                         return (DFOUND);
1667                                 }
1668                                 prevpp = &((*prevpp)->df_next);
1669                         }
1670                 }
1671                 if (dcp->dc_complete) {
1672                         ret = DNOENT;
1673                 } else {
1674                         ret = DNOCACHE;
1675                 }
1676                 mutex_exit(&dcap->dca_lock);
1677                 return (ret);
1678         } else {
1679                 mutex_exit(&dcap->dca_lock);
1680                 return (DNOCACHE);
1681         }
1682 }
1683 
1684 /*
1685  * Remove free space with the given handle from a complete or partial
1686  * directory cache.
1687  */
1688 dcret_t
1689 dnlc_dir_rem_space_by_handle(dcanchor_t *dcap, uint64_t handle)
1690 {
1691         dircache_t *dcp;
1692         dcfree_t **prevpp, *tfp;
1693         uint_t capacity;
1694         int ret;
1695 
1696         if (!dnlc_dir_enable) {
1697                 return (DNOCACHE);
1698         }
1699 
1700         mutex_enter(&dcap->dca_lock);
1701         dcp = (dircache_t *)dcap->dca_dircache;
1702         if (VALID_DIR_CACHE(dcp)) {
1703                 dcp->dc_actime = ddi_get_lbolt64();
1704                 if (dcp->dc_fhash_mask > 0) { /* ie not minimum */
1705                         capacity = (dcp->dc_fhash_mask + 1) <<
1706                             dnlc_dir_hash_size_shift;
1707                         if (dcp->dc_num_free <=
1708                             (capacity >> dnlc_dir_hash_resize_shift)) {
1709                                 dnlc_dir_adjust_fhash(dcp);
1710                         }
1711                 }
1712 
1713                 /*
1714                  * search for the exact entry
1715                  */
1716                 prevpp = &(dcp->dc_freehash[DDFHASH(handle, dcp)]);
1717                 while (*prevpp != NULL) {
1718                         if ((*prevpp)->df_handle == handle) {
1719                                 tfp = *prevpp;
1720                                 *prevpp = (*prevpp)->df_next;
1721                                 dcp->dc_num_free--;
1722                                 mutex_exit(&dcap->dca_lock);
1723                                 kmem_cache_free(dnlc_dir_space_cache, tfp);
1724                                 ncs.ncs_dir_num_ents.value.ui64--;
1725                                 return (DFOUND);
1726                         }
1727                         prevpp = &((*prevpp)->df_next);
1728                 }
1729                 if (dcp->dc_complete) {
1730                         ncs.ncs_dir_rems_fai.value.ui64++;
1731                         ret = DNOENT;
1732                 } else {
1733                         ret = DNOCACHE;
1734                 }
1735                 mutex_exit(&dcap->dca_lock);
1736                 return (ret);
1737         } else {
1738                 mutex_exit(&dcap->dca_lock);
1739                 return (DNOCACHE);
1740         }
1741 }
1742 
1743 /*
1744  * Update the handle of an directory cache entry.
1745  */
1746 dcret_t
1747 dnlc_dir_update(dcanchor_t *dcap, const char *name, uint64_t handle)
1748 {
1749         dircache_t *dcp;
1750         dcentry_t *dep;
1751         int hash;
1752         int ret;
1753         uchar_t namlen;
1754 
1755         if (!dnlc_dir_enable) {
1756                 return (DNOCACHE);
1757         }
1758 
1759         mutex_enter(&dcap->dca_lock);
1760         dcp = (dircache_t *)dcap->dca_dircache;
1761         if (VALID_DIR_CACHE(dcp)) {
1762                 dcp->dc_actime = ddi_get_lbolt64();
1763                 DNLC_DIR_HASH(name, hash, namlen);
1764                 dep = dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1765                 while (dep != NULL) {
1766                         if ((dep->de_hash == hash) &&
1767                             (namlen == dep->de_namelen) &&
1768                             bcmp(dep->de_name, name, namlen) == 0) {
1769                                 dep->de_handle = handle;
1770                                 mutex_exit(&dcap->dca_lock);
1771                                 return (DFOUND);
1772                         }
1773                         dep = dep->de_next;
1774                 }
1775                 if (dcp->dc_complete) {
1776                         ncs.ncs_dir_upd_fail.value.ui64++;
1777                         ret = DNOENT;
1778                 } else {
1779                         ret = DNOCACHE;
1780                 }
1781                 mutex_exit(&dcap->dca_lock);
1782                 return (ret);
1783         } else {
1784                 mutex_exit(&dcap->dca_lock);
1785                 return (DNOCACHE);
1786         }
1787 }
1788 
1789 void
1790 dnlc_dir_fini(dcanchor_t *dcap)
1791 {
1792         dircache_t *dcp;
1793 
1794         mutex_enter(&dc_head.dch_lock);
1795         mutex_enter(&dcap->dca_lock);
1796         dcp = (dircache_t *)dcap->dca_dircache;
1797         if (VALID_DIR_CACHE(dcp)) {
1798                 /*
1799                  * Unchain from global list
1800                  */
1801                 ncs.ncs_dir_finipurg.value.ui64++;
1802                 dcp->dc_prev->dc_next = dcp->dc_next;
1803                 dcp->dc_next->dc_prev = dcp->dc_prev;
1804         } else {
1805                 dcp = NULL;
1806         }
1807         dcap->dca_dircache = NULL;
1808         mutex_exit(&dcap->dca_lock);
1809         mutex_exit(&dc_head.dch_lock);
1810         mutex_destroy(&dcap->dca_lock);
1811         if (dcp) {
1812                 dnlc_dir_abort(dcp);
1813         }
1814 }
1815 
1816 /*
1817  * Reclaim callback for dnlc directory caching.
1818  * Invoked by the kernel memory allocator when memory gets tight.
1819  * This is a pretty serious condition and can lead easily lead to system
1820  * hangs if not enough space is returned.
1821  *
1822  * Deciding which directory (or directories) to purge is tricky.
1823  * Purging everything is an overkill, but purging just the oldest used
1824  * was found to lead to hangs. The largest cached directories use the
1825  * most memory, but take the most effort to rebuild, whereas the smaller
1826  * ones have little value and give back little space. So what to do?
1827  *
1828  * The current policy is to continue purging the oldest used directories
1829  * until at least dnlc_dir_min_reclaim directory entries have been purged.
1830  */
1831 /*ARGSUSED*/
1832 static void
1833 dnlc_dir_reclaim(void *unused)
1834 {
1835         dircache_t *dcp, *oldest;
1836         uint_t dirent_cnt = 0;
1837 
1838         mutex_enter(&dc_head.dch_lock);
1839         while (dirent_cnt < dnlc_dir_min_reclaim) {
1840                 dcp = dc_head.dch_next;
1841                 oldest = NULL;
1842                 while (dcp != (dircache_t *)&dc_head) {
1843                         if (oldest == NULL) {
1844                                 oldest = dcp;
1845                         } else {
1846                                 if (dcp->dc_actime < oldest->dc_actime) {
1847                                         oldest = dcp;
1848                                 }
1849                         }
1850                         dcp = dcp->dc_next;
1851                 }
1852                 if (oldest == NULL) {
1853                         /* nothing to delete */
1854                         mutex_exit(&dc_head.dch_lock);
1855                         return;
1856                 }
1857                 /*
1858                  * remove from directory chain and purge
1859                  */
1860                 oldest->dc_prev->dc_next = oldest->dc_next;
1861                 oldest->dc_next->dc_prev = oldest->dc_prev;
1862                 mutex_enter(&oldest->dc_anchor->dca_lock);
1863                 /*
1864                  * If this was the last entry then it must be too large.
1865                  * Mark it as such by saving a special dircache_t
1866                  * pointer (DC_RET_LOW_MEM) in the anchor. The error DNOMEM
1867                  * will be presented to the caller of dnlc_dir_start()
1868                  */
1869                 if (oldest->dc_next == oldest->dc_prev) {
1870                         oldest->dc_anchor->dca_dircache = DC_RET_LOW_MEM;
1871                         ncs.ncs_dir_rec_last.value.ui64++;
1872                 } else {
1873                         oldest->dc_anchor->dca_dircache = NULL;
1874                         ncs.ncs_dir_recl_any.value.ui64++;
1875                 }
1876                 mutex_exit(&oldest->dc_anchor->dca_lock);
1877                 dirent_cnt += oldest->dc_num_entries;
1878                 dnlc_dir_abort(oldest);
1879         }
1880         mutex_exit(&dc_head.dch_lock);
1881 }
1882 
1883 /*
1884  * Dynamically grow or shrink the size of the name hash table
1885  */
1886 static void
1887 dnlc_dir_adjust_nhash(dircache_t *dcp)
1888 {
1889         dcentry_t **newhash, *dep, **nhp, *tep;
1890         uint_t newsize;
1891         uint_t oldsize;
1892         uint_t newsizemask;
1893         int i;
1894 
1895         /*
1896          * Allocate new hash table
1897          */
1898         newsize = dcp->dc_num_entries >> dnlc_dir_hash_size_shift;
1899         newhash = kmem_zalloc(sizeof (dcentry_t *) * newsize, KM_NOSLEEP);
1900         if (newhash == NULL) {
1901                 /*
1902                  * System is short on memory just return
1903                  * Note, the old hash table is still usable.
1904                  * This return is unlikely to repeatedy occur, because
1905                  * either some other directory caches will be reclaimed
1906                  * due to memory shortage, thus freeing memory, or this
1907                  * directory cahe will be reclaimed.
1908                  */
1909                 return;
1910         }
1911         oldsize = dcp->dc_nhash_mask + 1;
1912         dcp->dc_nhash_mask = newsizemask = newsize - 1;
1913 
1914         /*
1915          * Move entries from the old table to the new
1916          */
1917         for (i = 0; i < oldsize; i++) { /* for each hash bucket */
1918                 dep = dcp->dc_namehash[i];
1919                 while (dep != NULL) { /* for each chained entry */
1920                         tep = dep;
1921                         dep = dep->de_next;
1922                         nhp = &newhash[tep->de_hash & newsizemask];
1923                         tep->de_next = *nhp;
1924                         *nhp = tep;
1925                 }
1926         }
1927 
1928         /*
1929          * delete old hash table and set new one in place
1930          */
1931         kmem_free(dcp->dc_namehash, sizeof (dcentry_t *) * oldsize);
1932         dcp->dc_namehash = newhash;
1933 }
1934 
1935 /*
1936  * Dynamically grow or shrink the size of the free space hash table
1937  */
1938 static void
1939 dnlc_dir_adjust_fhash(dircache_t *dcp)
1940 {
1941         dcfree_t **newhash, *dfp, **nhp, *tfp;
1942         uint_t newsize;
1943         uint_t oldsize;
1944         int i;
1945 
1946         /*
1947          * Allocate new hash table
1948          */
1949         newsize = dcp->dc_num_free >> dnlc_dir_hash_size_shift;
1950         newhash = kmem_zalloc(sizeof (dcfree_t *) * newsize, KM_NOSLEEP);
1951         if (newhash == NULL) {
1952                 /*
1953                  * System is short on memory just return
1954                  * Note, the old hash table is still usable.
1955                  * This return is unlikely to repeatedy occur, because
1956                  * either some other directory caches will be reclaimed
1957                  * due to memory shortage, thus freeing memory, or this
1958                  * directory cahe will be reclaimed.
1959                  */
1960                 return;
1961         }
1962         oldsize = dcp->dc_fhash_mask + 1;
1963         dcp->dc_fhash_mask = newsize - 1;
1964 
1965         /*
1966          * Move entries from the old table to the new
1967          */
1968         for (i = 0; i < oldsize; i++) { /* for each hash bucket */
1969                 dfp = dcp->dc_freehash[i];
1970                 while (dfp != NULL) { /* for each chained entry */
1971                         tfp = dfp;
1972                         dfp = dfp->df_next;
1973                         nhp = &newhash[DDFHASH(tfp->df_handle, dcp)];
1974                         tfp->df_next = *nhp;
1975                         *nhp = tfp;
1976                 }
1977         }
1978 
1979         /*
1980          * delete old hash table and set new one in place
1981          */
1982         kmem_free(dcp->dc_freehash, sizeof (dcfree_t *) * oldsize);
1983         dcp->dc_freehash = newhash;
1984 }