Print this page
re #13613 rb4516 Tunables needs volatile keyword
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/dnlc.c
+++ new/usr/src/uts/common/fs/dnlc.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
|
↓ open down ↓ |
12 lines elided |
↑ open up ↑ |
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 + * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
23 24 * Copyright (c) 2015, Joyent, Inc.
24 25 * Copyright (c) 2017 by Delphix. All rights reserved.
25 26 */
26 27
27 28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
28 29 /* All Rights Reserved */
29 30
30 31 /*
31 32 * University Copyright- Copyright (c) 1982, 1986, 1988
32 33 * The Regents of the University of California
33 34 * All Rights Reserved
34 35 *
35 36 * University Acknowledgment- Portions of this document are derived from
36 37 * software developed by the University of California, Berkeley, and its
37 38 * contributors.
38 39 */
39 40
40 41 #include <sys/types.h>
41 42 #include <sys/systm.h>
42 43 #include <sys/param.h>
43 44 #include <sys/t_lock.h>
44 45 #include <sys/systm.h>
45 46 #include <sys/vfs.h>
46 47 #include <sys/vnode.h>
47 48 #include <sys/dnlc.h>
48 49 #include <sys/kmem.h>
49 50 #include <sys/cmn_err.h>
50 51 #include <sys/vtrace.h>
51 52 #include <sys/bitmap.h>
52 53 #include <sys/var.h>
53 54 #include <sys/sysmacros.h>
54 55 #include <sys/kstat.h>
55 56 #include <sys/atomic.h>
56 57 #include <sys/taskq.h>
57 58
58 59 /*
59 60 * Directory name lookup cache.
60 61 * Based on code originally done by Robert Elz at Melbourne.
61 62 *
62 63 * Names found by directory scans are retained in a cache
63 64 * for future reference. Each hash chain is ordered by LRU
64 65 * Cache is indexed by hash value obtained from (vp, name)
65 66 * where the vp refers to the directory containing the name.
66 67 */
67 68
68 69 /*
69 70 * We want to be able to identify files that are referenced only by the DNLC.
70 71 * When adding a reference from the DNLC, call VN_HOLD_DNLC instead of VN_HOLD,
71 72 * since multiple DNLC references should only be counted once in v_count. The
72 73 * VN_HOLD macro itself is aliased to VN_HOLD_CALLER in this file to help
73 74 * differentiate the behaviors. (Unfortunately it is not possible to #undef
74 75 * VN_HOLD and retain VN_HOLD_CALLER. Ideally a Makefile rule would grep
75 76 * uncommented C tokens to check that VN_HOLD is referenced only once in this
76 77 * file, to define VN_HOLD_CALLER.)
77 78 */
78 79 #define VN_HOLD_CALLER VN_HOLD
79 80 #define VN_HOLD_DNLC(vp) { \
80 81 mutex_enter(&(vp)->v_lock); \
81 82 if ((vp)->v_count_dnlc == 0) { \
82 83 VN_HOLD_LOCKED(vp); \
83 84 } \
84 85 (vp)->v_count_dnlc++; \
85 86 mutex_exit(&(vp)->v_lock); \
86 87 }
87 88 #define VN_RELE_DNLC(vp) { \
88 89 vn_rele_dnlc(vp); \
89 90 }
90 91
91 92 /*
92 93 * Tunable nc_hashavelen is the average length desired for this chain, from
93 94 * which the size of the nc_hash table is derived at create time.
94 95 */
95 96 #define NC_HASHAVELEN_DEFAULT 4
96 97 int nc_hashavelen = NC_HASHAVELEN_DEFAULT;
97 98
98 99 /*
99 100 * NC_MOVETOFRONT is the move-to-front threshold: if the hash lookup
100 101 * depth exceeds this value, we move the looked-up entry to the front of
101 102 * its hash chain. The idea is to make sure that the most frequently
102 103 * accessed entries are found most quickly (by keeping them near the
103 104 * front of their hash chains).
104 105 */
105 106 #define NC_MOVETOFRONT 2
106 107
107 108 /*
108 109 *
109 110 * DNLC_MAX_RELE is used to size an array on the stack when releasing
110 111 * vnodes. This array is used rather than calling VN_RELE() inline because
111 112 * all dnlc locks must be dropped by that time in order to avoid a
112 113 * possible deadlock. This deadlock occurs when the dnlc holds the last
113 114 * reference to the vnode and so the VOP_INACTIVE vector is called which
114 115 * can in turn call back into the dnlc. A global array was used but had
115 116 * many problems:
116 117 * 1) Actually doesn't have an upper bound on the array size as
117 118 * entries can be added after starting the purge.
118 119 * 2) The locking scheme causes a hang.
119 120 * 3) Caused serialisation on the global lock.
120 121 * 4) The array was often unnecessarily huge.
121 122 *
122 123 * Note the current value 8 allows up to 4 cache entries (to be purged
123 124 * from each hash chain), before having to cycle around and retry.
124 125 * This ought to be ample given that nc_hashavelen is typically very small.
125 126 */
126 127 #define DNLC_MAX_RELE 8 /* must be even */
127 128
128 129 /*
129 130 * Hash table of name cache entries for fast lookup, dynamically
130 131 * allocated at startup.
131 132 */
132 133 nc_hash_t *nc_hash;
133 134
134 135 /*
135 136 * Rotors. Used to select entries on a round-robin basis.
136 137 */
|
↓ open down ↓ |
104 lines elided |
↑ open up ↑ |
137 138 static nc_hash_t *dnlc_purge_fs1_rotor;
138 139 static nc_hash_t *dnlc_free_rotor;
139 140
140 141 /*
141 142 * # of dnlc entries (uninitialized)
142 143 *
143 144 * the initial value was chosen as being
144 145 * a random string of bits, probably not
145 146 * normally chosen by a systems administrator
146 147 */
147 -int ncsize = -1;
148 +volatile int ncsize = -1;
148 149 volatile uint32_t dnlc_nentries = 0; /* current num of name cache entries */
149 150 static int nc_hashsz; /* size of hash table */
150 151 static int nc_hashmask; /* size of hash table minus 1 */
151 152
152 153 /*
153 154 * The dnlc_reduce_cache() taskq queue is activated when there are
154 155 * ncsize name cache entries and if no parameter is provided, it reduces
155 156 * the size down to dnlc_nentries_low_water, which is by default one
156 157 * hundreth less (or 99%) of ncsize.
157 158 *
158 159 * If a parameter is provided to dnlc_reduce_cache(), then we reduce
159 160 * the size down based on ncsize_onepercent - where ncsize_onepercent
160 161 * is 1% of ncsize; however, we never let dnlc_reduce_cache() reduce
161 162 * the size below 3% of ncsize (ncsize_min_percent).
162 163 */
163 164 #define DNLC_LOW_WATER_DIVISOR_DEFAULT 100
164 165 uint_t dnlc_low_water_divisor = DNLC_LOW_WATER_DIVISOR_DEFAULT;
165 166 uint_t dnlc_nentries_low_water;
166 167 int dnlc_reduce_idle = 1; /* no locking needed */
167 168 uint_t ncsize_onepercent;
168 169 uint_t ncsize_min_percent;
169 170
170 171 /*
171 172 * If dnlc_nentries hits dnlc_max_nentries (twice ncsize)
172 173 * then this means the dnlc_reduce_cache() taskq is failing to
173 174 * keep up. In this case we refuse to add new entries to the dnlc
174 175 * until the taskq catches up.
175 176 */
176 177 uint_t dnlc_max_nentries; /* twice ncsize */
177 178 uint64_t dnlc_max_nentries_cnt = 0; /* statistic on times we failed */
178 179
179 180 /*
180 181 * Tunable to define when we should just remove items from
181 182 * the end of the chain.
182 183 */
183 184 #define DNLC_LONG_CHAIN 8
184 185 uint_t dnlc_long_chain = DNLC_LONG_CHAIN;
185 186
186 187 /*
187 188 * ncstats has been deprecated, due to the integer size of the counters
188 189 * which can easily overflow in the dnlc.
189 190 * It is maintained (at some expense) for compatability.
190 191 * The preferred interface is the kstat accessible nc_stats below.
191 192 */
192 193 struct ncstats ncstats;
193 194
194 195 struct nc_stats ncs = {
195 196 { "hits", KSTAT_DATA_UINT64 },
196 197 { "misses", KSTAT_DATA_UINT64 },
197 198 { "negative_cache_hits", KSTAT_DATA_UINT64 },
198 199 { "enters", KSTAT_DATA_UINT64 },
199 200 { "double_enters", KSTAT_DATA_UINT64 },
200 201 { "purge_total_entries", KSTAT_DATA_UINT64 },
201 202 { "purge_all", KSTAT_DATA_UINT64 },
202 203 { "purge_vp", KSTAT_DATA_UINT64 },
203 204 { "purge_vfs", KSTAT_DATA_UINT64 },
204 205 { "purge_fs1", KSTAT_DATA_UINT64 },
205 206 { "pick_free", KSTAT_DATA_UINT64 },
206 207 { "pick_heuristic", KSTAT_DATA_UINT64 },
207 208 { "pick_last", KSTAT_DATA_UINT64 },
208 209
209 210 /* directory caching stats */
210 211
211 212 { "dir_hits", KSTAT_DATA_UINT64 },
212 213 { "dir_misses", KSTAT_DATA_UINT64 },
213 214 { "dir_cached_current", KSTAT_DATA_UINT64 },
214 215 { "dir_entries_cached_current", KSTAT_DATA_UINT64 },
215 216 { "dir_cached_total", KSTAT_DATA_UINT64 },
216 217 { "dir_start_no_memory", KSTAT_DATA_UINT64 },
217 218 { "dir_add_no_memory", KSTAT_DATA_UINT64 },
218 219 { "dir_add_abort", KSTAT_DATA_UINT64 },
219 220 { "dir_add_max", KSTAT_DATA_UINT64 },
220 221 { "dir_remove_entry_fail", KSTAT_DATA_UINT64 },
221 222 { "dir_remove_space_fail", KSTAT_DATA_UINT64 },
222 223 { "dir_update_fail", KSTAT_DATA_UINT64 },
223 224 { "dir_fini_purge", KSTAT_DATA_UINT64 },
224 225 { "dir_reclaim_last", KSTAT_DATA_UINT64 },
225 226 { "dir_reclaim_any", KSTAT_DATA_UINT64 },
226 227 };
227 228
228 229 static int doingcache = 1;
229 230
230 231 vnode_t negative_cache_vnode;
231 232
232 233 /*
233 234 * Insert entry at the front of the queue
234 235 */
235 236 #define nc_inshash(ncp, hp) \
236 237 { \
237 238 (ncp)->hash_next = (hp)->hash_next; \
238 239 (ncp)->hash_prev = (ncache_t *)(hp); \
239 240 (hp)->hash_next->hash_prev = (ncp); \
240 241 (hp)->hash_next = (ncp); \
241 242 }
242 243
243 244 /*
244 245 * Remove entry from hash queue
245 246 */
246 247 #define nc_rmhash(ncp) \
247 248 { \
248 249 (ncp)->hash_prev->hash_next = (ncp)->hash_next; \
249 250 (ncp)->hash_next->hash_prev = (ncp)->hash_prev; \
250 251 (ncp)->hash_prev = NULL; \
251 252 (ncp)->hash_next = NULL; \
252 253 }
253 254
254 255 /*
255 256 * Free an entry.
256 257 */
257 258 #define dnlc_free(ncp) \
258 259 { \
259 260 kmem_free((ncp), sizeof (ncache_t) + (ncp)->namlen); \
260 261 atomic_dec_32(&dnlc_nentries); \
261 262 }
262 263
263 264
264 265 /*
265 266 * Cached directory info.
266 267 * ======================
267 268 */
268 269
269 270 /*
270 271 * Cached directory free space hash function.
271 272 * Needs the free space handle and the dcp to get the hash table size
272 273 * Returns the hash index.
273 274 */
274 275 #define DDFHASH(handle, dcp) ((handle >> 2) & (dcp)->dc_fhash_mask)
275 276
276 277 /*
277 278 * Cached directory name entry hash function.
278 279 * Uses the name and returns in the input arguments the hash and the name
279 280 * length.
280 281 */
281 282 #define DNLC_DIR_HASH(name, hash, namelen) \
282 283 { \
283 284 char Xc; \
284 285 const char *Xcp; \
285 286 hash = *name; \
286 287 for (Xcp = (name + 1); (Xc = *Xcp) != 0; Xcp++) \
287 288 hash = (hash << 4) + hash + Xc; \
288 289 ASSERT((Xcp - (name)) <= ((1 << NBBY) - 1)); \
289 290 namelen = Xcp - (name); \
290 291 }
291 292
292 293 /* special dircache_t pointer to indicate error should be returned */
293 294 /*
294 295 * The anchor directory cache pointer can contain 3 types of values,
295 296 * 1) NULL: No directory cache
|
↓ open down ↓ |
138 lines elided |
↑ open up ↑ |
296 297 * 2) DC_RET_LOW_MEM (-1): There was a directory cache that found to be
297 298 * too big or a memory shortage occurred. This value remains in the
298 299 * pointer until a dnlc_dir_start() which returns the a DNOMEM error.
299 300 * This is kludgy but efficient and only visible in this source file.
300 301 * 3) A valid cache pointer.
301 302 */
302 303 #define DC_RET_LOW_MEM (dircache_t *)1
303 304 #define VALID_DIR_CACHE(dcp) ((dircache_t *)(dcp) > DC_RET_LOW_MEM)
304 305
305 306 /* Tunables */
306 -uint_t dnlc_dir_enable = 1; /* disable caching directories by setting to 0 */
307 -uint_t dnlc_dir_min_size = 40; /* min no of directory entries before caching */
308 -uint_t dnlc_dir_max_size = UINT_MAX; /* ditto maximum */
307 +volatile uint_t dnlc_dir_enable = 1; /* disable caching directories by */
308 + /* setting to 0 */
309 +volatile uint_t dnlc_dir_min_size = 40; /* min no of directory entries before */
310 + /* caching */
311 +volatile uint_t dnlc_dir_max_size = UINT_MAX; /* ditto maximum */
309 312 uint_t dnlc_dir_hash_size_shift = 3; /* 8 entries per hash bucket */
310 313 uint_t dnlc_dir_min_reclaim = 350000; /* approx 1MB of dcentrys */
311 314 /*
312 315 * dnlc_dir_hash_resize_shift determines when the hash tables
313 316 * get re-adjusted due to growth or shrinkage
314 317 * - currently 2 indicating that there can be at most 4
315 318 * times or at least one quarter the number of entries
316 319 * before hash table readjustment. Note that with
317 320 * dnlc_dir_hash_size_shift above set at 3 this would
318 321 * mean readjustment would occur if the average number
319 322 * of entries went above 32 or below 2
320 323 */
321 324 uint_t dnlc_dir_hash_resize_shift = 2; /* readjust rate */
322 325
323 326 static kmem_cache_t *dnlc_dir_space_cache; /* free space entry cache */
324 327 static dchead_t dc_head; /* anchor of cached directories */
325 328
326 329 /* Prototypes */
327 330 static ncache_t *dnlc_get(uchar_t namlen);
328 331 static ncache_t *dnlc_search(vnode_t *dp, const char *name, uchar_t namlen,
329 332 int hash);
330 333 static void dnlc_dir_reclaim(void *unused);
331 334 static void dnlc_dir_abort(dircache_t *dcp);
332 335 static void dnlc_dir_adjust_fhash(dircache_t *dcp);
333 336 static void dnlc_dir_adjust_nhash(dircache_t *dcp);
334 337 static void do_dnlc_reduce_cache(void *);
335 338
336 339
337 340 /*
338 341 * Initialize the directory cache.
339 342 */
340 343 void
341 344 dnlc_init()
342 345 {
343 346 nc_hash_t *hp;
344 347 kstat_t *ksp;
345 348 int i;
346 349
347 350 /*
348 351 * Set up the size of the dnlc (ncsize) and its low water mark.
349 352 */
350 353 if (ncsize == -1) {
351 354 /* calculate a reasonable size for the low water */
352 355 dnlc_nentries_low_water = 4 * (v.v_proc + maxusers) + 320;
353 356 ncsize = dnlc_nentries_low_water +
354 357 (dnlc_nentries_low_water / dnlc_low_water_divisor);
355 358 } else {
356 359 /* don't change the user specified ncsize */
357 360 dnlc_nentries_low_water =
358 361 ncsize - (ncsize / dnlc_low_water_divisor);
359 362 }
360 363 if (ncsize <= 0) {
361 364 doingcache = 0;
362 365 dnlc_dir_enable = 0; /* also disable directory caching */
363 366 ncsize = 0;
364 367 cmn_err(CE_NOTE, "name cache (dnlc) disabled");
365 368 return;
366 369 }
367 370 dnlc_max_nentries = ncsize * 2;
368 371 ncsize_onepercent = ncsize / 100;
369 372 ncsize_min_percent = ncsize_onepercent * 3;
370 373
371 374 /*
372 375 * Initialise the hash table.
373 376 * Compute hash size rounding to the next power of two.
374 377 */
375 378 nc_hashsz = ncsize / nc_hashavelen;
376 379 nc_hashsz = 1 << highbit(nc_hashsz);
377 380 nc_hashmask = nc_hashsz - 1;
378 381 nc_hash = kmem_zalloc(nc_hashsz * sizeof (*nc_hash), KM_SLEEP);
379 382 for (i = 0; i < nc_hashsz; i++) {
380 383 hp = (nc_hash_t *)&nc_hash[i];
381 384 mutex_init(&hp->hash_lock, NULL, MUTEX_DEFAULT, NULL);
382 385 hp->hash_next = (ncache_t *)hp;
383 386 hp->hash_prev = (ncache_t *)hp;
384 387 }
385 388
386 389 /*
387 390 * Initialize rotors
388 391 */
389 392 dnlc_free_rotor = dnlc_purge_fs1_rotor = &nc_hash[0];
390 393
391 394 /*
392 395 * Set up the directory caching to use kmem_cache_alloc
393 396 * for its free space entries so that we can get a callback
394 397 * when the system is short on memory, to allow us to free
395 398 * up some memory. we don't use the constructor/deconstructor
396 399 * functions.
397 400 */
398 401 dnlc_dir_space_cache = kmem_cache_create("dnlc_space_cache",
399 402 sizeof (dcfree_t), 0, NULL, NULL, dnlc_dir_reclaim, NULL,
400 403 NULL, 0);
401 404
402 405 /*
403 406 * Initialise the head of the cached directory structures
404 407 */
405 408 mutex_init(&dc_head.dch_lock, NULL, MUTEX_DEFAULT, NULL);
406 409 dc_head.dch_next = (dircache_t *)&dc_head;
407 410 dc_head.dch_prev = (dircache_t *)&dc_head;
408 411
409 412 /*
410 413 * Put a hold on the negative cache vnode so that it never goes away
411 414 * (VOP_INACTIVE isn't called on it).
412 415 */
413 416 vn_reinit(&negative_cache_vnode);
414 417
415 418 /*
416 419 * Initialise kstats - both the old compatability raw kind and
417 420 * the more extensive named stats.
418 421 */
419 422 ksp = kstat_create("unix", 0, "ncstats", "misc", KSTAT_TYPE_RAW,
420 423 sizeof (struct ncstats), KSTAT_FLAG_VIRTUAL);
421 424 if (ksp) {
422 425 ksp->ks_data = (void *) &ncstats;
423 426 kstat_install(ksp);
424 427 }
425 428 ksp = kstat_create("unix", 0, "dnlcstats", "misc", KSTAT_TYPE_NAMED,
426 429 sizeof (ncs) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
427 430 if (ksp) {
428 431 ksp->ks_data = (void *) &ncs;
429 432 kstat_install(ksp);
430 433 }
431 434 }
432 435
433 436 /*
434 437 * Add a name to the directory cache.
435 438 */
436 439 void
437 440 dnlc_enter(vnode_t *dp, const char *name, vnode_t *vp)
438 441 {
439 442 ncache_t *ncp;
440 443 nc_hash_t *hp;
441 444 uchar_t namlen;
442 445 int hash;
443 446
444 447 TRACE_0(TR_FAC_NFS, TR_DNLC_ENTER_START, "dnlc_enter_start:");
445 448
446 449 if (!doingcache) {
447 450 TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
448 451 "dnlc_enter_end:(%S) %d", "not caching", 0);
449 452 return;
450 453 }
451 454
452 455 /*
453 456 * Get a new dnlc entry. Assume the entry won't be in the cache
454 457 * and initialize it now
455 458 */
456 459 DNLCHASH(name, dp, hash, namlen);
457 460 if ((ncp = dnlc_get(namlen)) == NULL)
458 461 return;
459 462 ncp->dp = dp;
460 463 VN_HOLD_DNLC(dp);
461 464 ncp->vp = vp;
462 465 VN_HOLD_DNLC(vp);
463 466 bcopy(name, ncp->name, namlen + 1); /* name and null */
464 467 ncp->hash = hash;
465 468 hp = &nc_hash[hash & nc_hashmask];
466 469
467 470 mutex_enter(&hp->hash_lock);
468 471 if (dnlc_search(dp, name, namlen, hash) != NULL) {
469 472 mutex_exit(&hp->hash_lock);
470 473 ncstats.dbl_enters++;
471 474 ncs.ncs_dbl_enters.value.ui64++;
472 475 VN_RELE_DNLC(dp);
473 476 VN_RELE_DNLC(vp);
474 477 dnlc_free(ncp); /* crfree done here */
475 478 TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
476 479 "dnlc_enter_end:(%S) %d", "dbl enter", ncstats.dbl_enters);
477 480 return;
478 481 }
479 482 /*
480 483 * Insert back into the hash chain.
481 484 */
482 485 nc_inshash(ncp, hp);
483 486 mutex_exit(&hp->hash_lock);
484 487 ncstats.enters++;
485 488 ncs.ncs_enters.value.ui64++;
486 489 TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
487 490 "dnlc_enter_end:(%S) %d", "done", ncstats.enters);
488 491 }
489 492
490 493 /*
491 494 * Add a name to the directory cache.
492 495 *
493 496 * This function is basically identical with
494 497 * dnlc_enter(). The difference is that when the
495 498 * desired dnlc entry is found, the vnode in the
496 499 * ncache is compared with the vnode passed in.
497 500 *
498 501 * If they are not equal then the ncache is
499 502 * updated with the passed in vnode. Otherwise
500 503 * it just frees up the newly allocated dnlc entry.
501 504 */
502 505 void
503 506 dnlc_update(vnode_t *dp, const char *name, vnode_t *vp)
504 507 {
505 508 ncache_t *ncp;
506 509 ncache_t *tcp;
507 510 vnode_t *tvp;
508 511 nc_hash_t *hp;
509 512 int hash;
510 513 uchar_t namlen;
511 514
512 515 TRACE_0(TR_FAC_NFS, TR_DNLC_ENTER_START, "dnlc_update_start:");
513 516
514 517 if (!doingcache) {
515 518 TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
516 519 "dnlc_update_end:(%S) %d", "not caching", 0);
517 520 return;
518 521 }
519 522
520 523 /*
521 524 * Get a new dnlc entry and initialize it now.
522 525 * If we fail to get a new entry, call dnlc_remove() to purge
523 526 * any existing dnlc entry including negative cache (DNLC_NO_VNODE)
524 527 * entry.
525 528 * Failure to clear an existing entry could result in false dnlc
526 529 * lookup (negative/stale entry).
527 530 */
528 531 DNLCHASH(name, dp, hash, namlen);
529 532 if ((ncp = dnlc_get(namlen)) == NULL) {
530 533 dnlc_remove(dp, name);
531 534 return;
532 535 }
533 536 ncp->dp = dp;
534 537 VN_HOLD_DNLC(dp);
535 538 ncp->vp = vp;
536 539 VN_HOLD_DNLC(vp);
537 540 bcopy(name, ncp->name, namlen + 1); /* name and null */
538 541 ncp->hash = hash;
539 542 hp = &nc_hash[hash & nc_hashmask];
540 543
541 544 mutex_enter(&hp->hash_lock);
542 545 if ((tcp = dnlc_search(dp, name, namlen, hash)) != NULL) {
543 546 if (tcp->vp != vp) {
544 547 tvp = tcp->vp;
545 548 tcp->vp = vp;
546 549 mutex_exit(&hp->hash_lock);
547 550 VN_RELE_DNLC(tvp);
548 551 ncstats.enters++;
549 552 ncs.ncs_enters.value.ui64++;
550 553 TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
551 554 "dnlc_update_end:(%S) %d", "done", ncstats.enters);
552 555 } else {
553 556 mutex_exit(&hp->hash_lock);
554 557 VN_RELE_DNLC(vp);
555 558 ncstats.dbl_enters++;
556 559 ncs.ncs_dbl_enters.value.ui64++;
557 560 TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
558 561 "dnlc_update_end:(%S) %d",
559 562 "dbl enter", ncstats.dbl_enters);
560 563 }
561 564 VN_RELE_DNLC(dp);
562 565 dnlc_free(ncp); /* crfree done here */
563 566 return;
564 567 }
565 568 /*
566 569 * insert the new entry, since it is not in dnlc yet
567 570 */
568 571 nc_inshash(ncp, hp);
569 572 mutex_exit(&hp->hash_lock);
570 573 ncstats.enters++;
571 574 ncs.ncs_enters.value.ui64++;
572 575 TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
573 576 "dnlc_update_end:(%S) %d", "done", ncstats.enters);
574 577 }
575 578
576 579 /*
577 580 * Look up a name in the directory name cache.
578 581 *
579 582 * Return a doubly-held vnode if found: one hold so that it may
580 583 * remain in the cache for other users, the other hold so that
581 584 * the cache is not re-cycled and the identity of the vnode is
582 585 * lost before the caller can use the vnode.
583 586 */
584 587 vnode_t *
585 588 dnlc_lookup(vnode_t *dp, const char *name)
586 589 {
587 590 ncache_t *ncp;
588 591 nc_hash_t *hp;
589 592 vnode_t *vp;
590 593 int hash, depth;
591 594 uchar_t namlen;
592 595
593 596 TRACE_2(TR_FAC_NFS, TR_DNLC_LOOKUP_START,
594 597 "dnlc_lookup_start:dp %x name %s", dp, name);
595 598
596 599 if (!doingcache) {
597 600 TRACE_4(TR_FAC_NFS, TR_DNLC_LOOKUP_END,
598 601 "dnlc_lookup_end:%S %d vp %x name %s",
599 602 "not_caching", 0, NULL, name);
600 603 return (NULL);
601 604 }
602 605
603 606 DNLCHASH(name, dp, hash, namlen);
604 607 depth = 1;
605 608 hp = &nc_hash[hash & nc_hashmask];
606 609 mutex_enter(&hp->hash_lock);
607 610
608 611 for (ncp = hp->hash_next; ncp != (ncache_t *)hp;
609 612 ncp = ncp->hash_next) {
610 613 if (ncp->hash == hash && /* fast signature check */
611 614 ncp->dp == dp &&
612 615 ncp->namlen == namlen &&
613 616 bcmp(ncp->name, name, namlen) == 0) {
614 617 /*
615 618 * Move this entry to the head of its hash chain
616 619 * if it's not already close.
617 620 */
618 621 if (depth > NC_MOVETOFRONT) {
619 622 ncache_t *next = ncp->hash_next;
620 623 ncache_t *prev = ncp->hash_prev;
621 624
622 625 prev->hash_next = next;
623 626 next->hash_prev = prev;
624 627 ncp->hash_next = next = hp->hash_next;
625 628 ncp->hash_prev = (ncache_t *)hp;
626 629 next->hash_prev = ncp;
627 630 hp->hash_next = ncp;
628 631
629 632 ncstats.move_to_front++;
630 633 }
631 634
632 635 /*
633 636 * Put a hold on the vnode now so its identity
634 637 * can't change before the caller has a chance to
635 638 * put a hold on it.
636 639 */
637 640 vp = ncp->vp;
638 641 VN_HOLD_CALLER(vp);
639 642 mutex_exit(&hp->hash_lock);
640 643 ncstats.hits++;
641 644 ncs.ncs_hits.value.ui64++;
642 645 if (vp == DNLC_NO_VNODE) {
643 646 ncs.ncs_neg_hits.value.ui64++;
644 647 }
645 648 TRACE_4(TR_FAC_NFS, TR_DNLC_LOOKUP_END,
646 649 "dnlc_lookup_end:%S %d vp %x name %s", "hit",
647 650 ncstats.hits, vp, name);
648 651 return (vp);
649 652 }
650 653 depth++;
651 654 }
652 655
653 656 mutex_exit(&hp->hash_lock);
654 657 ncstats.misses++;
655 658 ncs.ncs_misses.value.ui64++;
656 659 TRACE_4(TR_FAC_NFS, TR_DNLC_LOOKUP_END,
657 660 "dnlc_lookup_end:%S %d vp %x name %s", "miss", ncstats.misses,
658 661 NULL, name);
659 662 return (NULL);
660 663 }
661 664
662 665 /*
663 666 * Remove an entry in the directory name cache.
664 667 */
665 668 void
666 669 dnlc_remove(vnode_t *dp, const char *name)
667 670 {
668 671 ncache_t *ncp;
669 672 nc_hash_t *hp;
670 673 uchar_t namlen;
671 674 int hash;
672 675
673 676 if (!doingcache)
674 677 return;
675 678 DNLCHASH(name, dp, hash, namlen);
676 679 hp = &nc_hash[hash & nc_hashmask];
677 680
678 681 mutex_enter(&hp->hash_lock);
679 682 if (ncp = dnlc_search(dp, name, namlen, hash)) {
680 683 /*
681 684 * Free up the entry
682 685 */
683 686 nc_rmhash(ncp);
684 687 mutex_exit(&hp->hash_lock);
685 688 VN_RELE_DNLC(ncp->vp);
686 689 VN_RELE_DNLC(ncp->dp);
687 690 dnlc_free(ncp);
688 691 return;
689 692 }
690 693 mutex_exit(&hp->hash_lock);
691 694 }
692 695
693 696 /*
694 697 * Purge the entire cache.
695 698 */
696 699 void
697 700 dnlc_purge()
698 701 {
699 702 nc_hash_t *nch;
700 703 ncache_t *ncp;
701 704 int index;
702 705 int i;
703 706 vnode_t *nc_rele[DNLC_MAX_RELE];
704 707
705 708 if (!doingcache)
706 709 return;
707 710
708 711 ncstats.purges++;
709 712 ncs.ncs_purge_all.value.ui64++;
710 713
711 714 for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
712 715 index = 0;
713 716 mutex_enter(&nch->hash_lock);
714 717 ncp = nch->hash_next;
715 718 while (ncp != (ncache_t *)nch) {
716 719 ncache_t *np;
717 720
718 721 np = ncp->hash_next;
719 722 nc_rele[index++] = ncp->vp;
720 723 nc_rele[index++] = ncp->dp;
721 724
722 725 nc_rmhash(ncp);
723 726 dnlc_free(ncp);
724 727 ncp = np;
725 728 ncs.ncs_purge_total.value.ui64++;
726 729 if (index == DNLC_MAX_RELE)
727 730 break;
728 731 }
729 732 mutex_exit(&nch->hash_lock);
730 733
731 734 /* Release holds on all the vnodes now that we have no locks */
732 735 for (i = 0; i < index; i++) {
733 736 VN_RELE_DNLC(nc_rele[i]);
734 737 }
735 738 if (ncp != (ncache_t *)nch) {
736 739 nch--; /* Do current hash chain again */
737 740 }
738 741 }
739 742 }
740 743
741 744 /*
742 745 * Purge any cache entries referencing a vnode. Exit as soon as the dnlc
743 746 * reference count goes to zero (the caller still holds a reference).
744 747 */
745 748 void
746 749 dnlc_purge_vp(vnode_t *vp)
747 750 {
748 751 nc_hash_t *nch;
749 752 ncache_t *ncp;
750 753 int index;
751 754 vnode_t *nc_rele[DNLC_MAX_RELE];
752 755
753 756 ASSERT(vp->v_count > 0);
754 757 if (vp->v_count_dnlc == 0) {
755 758 return;
756 759 }
757 760
758 761 if (!doingcache)
759 762 return;
760 763
761 764 ncstats.purges++;
762 765 ncs.ncs_purge_vp.value.ui64++;
763 766
764 767 for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
765 768 index = 0;
766 769 mutex_enter(&nch->hash_lock);
767 770 ncp = nch->hash_next;
768 771 while (ncp != (ncache_t *)nch) {
769 772 ncache_t *np;
770 773
771 774 np = ncp->hash_next;
772 775 if (ncp->dp == vp || ncp->vp == vp) {
773 776 nc_rele[index++] = ncp->vp;
774 777 nc_rele[index++] = ncp->dp;
775 778 nc_rmhash(ncp);
776 779 dnlc_free(ncp);
777 780 ncs.ncs_purge_total.value.ui64++;
778 781 if (index == DNLC_MAX_RELE) {
779 782 ncp = np;
780 783 break;
781 784 }
782 785 }
783 786 ncp = np;
784 787 }
785 788 mutex_exit(&nch->hash_lock);
786 789
787 790 /* Release holds on all the vnodes now that we have no locks */
788 791 while (index) {
789 792 VN_RELE_DNLC(nc_rele[--index]);
790 793 }
791 794
792 795 if (vp->v_count_dnlc == 0) {
793 796 return;
794 797 }
795 798
796 799 if (ncp != (ncache_t *)nch) {
797 800 nch--; /* Do current hash chain again */
798 801 }
799 802 }
800 803 }
801 804
802 805 /*
803 806 * Purge cache entries referencing a vfsp. Caller supplies a count
804 807 * of entries to purge; up to that many will be freed. A count of
805 808 * zero indicates that all such entries should be purged. Returns
806 809 * the number of entries that were purged.
807 810 */
808 811 int
809 812 dnlc_purge_vfsp(vfs_t *vfsp, int count)
810 813 {
811 814 nc_hash_t *nch;
812 815 ncache_t *ncp;
813 816 int n = 0;
814 817 int index;
815 818 int i;
816 819 vnode_t *nc_rele[DNLC_MAX_RELE];
817 820
818 821 if (!doingcache)
819 822 return (0);
820 823
821 824 ncstats.purges++;
822 825 ncs.ncs_purge_vfs.value.ui64++;
823 826
824 827 for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
825 828 index = 0;
826 829 mutex_enter(&nch->hash_lock);
827 830 ncp = nch->hash_next;
828 831 while (ncp != (ncache_t *)nch) {
829 832 ncache_t *np;
830 833
831 834 np = ncp->hash_next;
832 835 ASSERT(ncp->dp != NULL);
833 836 ASSERT(ncp->vp != NULL);
834 837 if ((ncp->dp->v_vfsp == vfsp) ||
835 838 (ncp->vp->v_vfsp == vfsp)) {
836 839 n++;
837 840 nc_rele[index++] = ncp->vp;
838 841 nc_rele[index++] = ncp->dp;
839 842 nc_rmhash(ncp);
840 843 dnlc_free(ncp);
841 844 ncs.ncs_purge_total.value.ui64++;
842 845 if (index == DNLC_MAX_RELE) {
843 846 ncp = np;
844 847 break;
845 848 }
846 849 if (count != 0 && n >= count) {
847 850 break;
848 851 }
849 852 }
850 853 ncp = np;
851 854 }
852 855 mutex_exit(&nch->hash_lock);
853 856 /* Release holds on all the vnodes now that we have no locks */
854 857 for (i = 0; i < index; i++) {
855 858 VN_RELE_DNLC(nc_rele[i]);
856 859 }
857 860 if (count != 0 && n >= count) {
858 861 return (n);
859 862 }
860 863 if (ncp != (ncache_t *)nch) {
861 864 nch--; /* Do current hash chain again */
862 865 }
863 866 }
864 867 return (n);
865 868 }
866 869
867 870 /*
868 871 * Purge 1 entry from the dnlc that is part of the filesystem(s)
869 872 * represented by 'vop'. The purpose of this routine is to allow
870 873 * users of the dnlc to free a vnode that is being held by the dnlc.
871 874 *
872 875 * If we find a vnode that we release which will result in
873 876 * freeing the underlying vnode (count was 1), return 1, 0
874 877 * if no appropriate vnodes found.
875 878 *
876 879 * Note, vop is not the 'right' identifier for a filesystem.
877 880 */
878 881 int
879 882 dnlc_fs_purge1(vnodeops_t *vop)
880 883 {
881 884 nc_hash_t *end;
882 885 nc_hash_t *hp;
883 886 ncache_t *ncp;
884 887 vnode_t *vp;
885 888
886 889 if (!doingcache)
887 890 return (0);
888 891
889 892 ncs.ncs_purge_fs1.value.ui64++;
890 893
891 894 /*
892 895 * Scan the dnlc entries looking for a likely candidate.
893 896 */
894 897 hp = end = dnlc_purge_fs1_rotor;
895 898
896 899 do {
897 900 if (++hp == &nc_hash[nc_hashsz])
898 901 hp = nc_hash;
899 902 dnlc_purge_fs1_rotor = hp;
900 903 if (hp->hash_next == (ncache_t *)hp)
901 904 continue;
902 905 mutex_enter(&hp->hash_lock);
903 906 for (ncp = hp->hash_prev;
904 907 ncp != (ncache_t *)hp;
905 908 ncp = ncp->hash_prev) {
906 909 vp = ncp->vp;
907 910 if (!vn_has_cached_data(vp) && (vp->v_count == 1) &&
908 911 vn_matchops(vp, vop))
909 912 break;
910 913 }
911 914 if (ncp != (ncache_t *)hp) {
912 915 nc_rmhash(ncp);
913 916 mutex_exit(&hp->hash_lock);
914 917 VN_RELE_DNLC(ncp->dp);
915 918 VN_RELE_DNLC(vp)
916 919 dnlc_free(ncp);
917 920 ncs.ncs_purge_total.value.ui64++;
918 921 return (1);
919 922 }
920 923 mutex_exit(&hp->hash_lock);
921 924 } while (hp != end);
922 925 return (0);
923 926 }
924 927
925 928 /*
926 929 * Utility routine to search for a cache entry. Return the
927 930 * ncache entry if found, NULL otherwise.
928 931 */
929 932 static ncache_t *
930 933 dnlc_search(vnode_t *dp, const char *name, uchar_t namlen, int hash)
931 934 {
932 935 nc_hash_t *hp;
933 936 ncache_t *ncp;
934 937
935 938 hp = &nc_hash[hash & nc_hashmask];
936 939
937 940 for (ncp = hp->hash_next; ncp != (ncache_t *)hp; ncp = ncp->hash_next) {
938 941 if (ncp->hash == hash &&
939 942 ncp->dp == dp &&
940 943 ncp->namlen == namlen &&
941 944 bcmp(ncp->name, name, namlen) == 0)
942 945 return (ncp);
943 946 }
944 947 return (NULL);
945 948 }
946 949
947 950 #if ((1 << NBBY) - 1) < (MAXNAMELEN - 1)
948 951 #error ncache_t name length representation is too small
949 952 #endif
950 953
951 954 void
952 955 dnlc_reduce_cache(void *reduce_percent)
953 956 {
954 957 if (dnlc_reduce_idle && (dnlc_nentries >= ncsize || reduce_percent)) {
955 958 dnlc_reduce_idle = 0;
956 959 if ((taskq_dispatch(system_taskq, do_dnlc_reduce_cache,
957 960 reduce_percent, TQ_NOSLEEP)) == NULL)
958 961 dnlc_reduce_idle = 1;
959 962 }
960 963 }
961 964
962 965 /*
963 966 * Get a new name cache entry.
964 967 * If the dnlc_reduce_cache() taskq isn't keeping up with demand, or memory
965 968 * is short then just return NULL. If we're over ncsize then kick off a
966 969 * thread to free some in use entries down to dnlc_nentries_low_water.
967 970 * Caller must initialise all fields except namlen.
968 971 * Component names are defined to be less than MAXNAMELEN
969 972 * which includes a null.
970 973 */
971 974 static ncache_t *
972 975 dnlc_get(uchar_t namlen)
973 976 {
974 977 ncache_t *ncp;
975 978
976 979 if (dnlc_nentries > dnlc_max_nentries) {
977 980 dnlc_max_nentries_cnt++; /* keep a statistic */
978 981 return (NULL);
979 982 }
980 983 ncp = kmem_alloc(sizeof (ncache_t) + namlen, KM_NOSLEEP);
981 984 if (ncp == NULL) {
982 985 return (NULL);
983 986 }
984 987 ncp->namlen = namlen;
985 988 atomic_inc_32(&dnlc_nentries);
986 989 dnlc_reduce_cache(NULL);
987 990 return (ncp);
988 991 }
989 992
990 993 /*
991 994 * Taskq routine to free up name cache entries to reduce the
992 995 * cache size to the low water mark if "reduce_percent" is not provided.
993 996 * If "reduce_percent" is provided, reduce cache size by
994 997 * (ncsize_onepercent * reduce_percent).
995 998 */
996 999 /*ARGSUSED*/
997 1000 static void
998 1001 do_dnlc_reduce_cache(void *reduce_percent)
999 1002 {
1000 1003 nc_hash_t *hp = dnlc_free_rotor, *start_hp = hp;
1001 1004 vnode_t *vp;
1002 1005 ncache_t *ncp;
1003 1006 int cnt;
1004 1007 uint_t low_water = dnlc_nentries_low_water;
1005 1008
1006 1009 if (reduce_percent) {
1007 1010 uint_t reduce_cnt;
1008 1011
1009 1012 /*
1010 1013 * Never try to reduce the current number
1011 1014 * of cache entries below 3% of ncsize.
1012 1015 */
1013 1016 if (dnlc_nentries <= ncsize_min_percent) {
1014 1017 dnlc_reduce_idle = 1;
1015 1018 return;
1016 1019 }
1017 1020 reduce_cnt = ncsize_onepercent *
1018 1021 (uint_t)(uintptr_t)reduce_percent;
1019 1022
1020 1023 if (reduce_cnt > dnlc_nentries ||
1021 1024 dnlc_nentries - reduce_cnt < ncsize_min_percent)
1022 1025 low_water = ncsize_min_percent;
1023 1026 else
1024 1027 low_water = dnlc_nentries - reduce_cnt;
1025 1028 }
1026 1029
1027 1030 do {
1028 1031 /*
1029 1032 * Find the first non empty hash queue without locking.
1030 1033 * Only look at each hash queue once to avoid an infinite loop.
1031 1034 */
1032 1035 do {
1033 1036 if (++hp == &nc_hash[nc_hashsz])
1034 1037 hp = nc_hash;
1035 1038 } while (hp->hash_next == (ncache_t *)hp && hp != start_hp);
1036 1039
1037 1040 /* return if all hash queues are empty. */
1038 1041 if (hp->hash_next == (ncache_t *)hp) {
1039 1042 dnlc_reduce_idle = 1;
1040 1043 return;
1041 1044 }
1042 1045
1043 1046 mutex_enter(&hp->hash_lock);
1044 1047 for (cnt = 0, ncp = hp->hash_prev; ncp != (ncache_t *)hp;
1045 1048 ncp = ncp->hash_prev, cnt++) {
1046 1049 vp = ncp->vp;
1047 1050 /*
1048 1051 * A name cache entry with a reference count
1049 1052 * of one is only referenced by the dnlc.
1050 1053 * Also negative cache entries are purged first.
1051 1054 */
1052 1055 if (!vn_has_cached_data(vp) &&
1053 1056 ((vp->v_count == 1) || (vp == DNLC_NO_VNODE))) {
1054 1057 ncs.ncs_pick_heur.value.ui64++;
1055 1058 goto found;
1056 1059 }
1057 1060 /*
1058 1061 * Remove from the end of the chain if the
1059 1062 * chain is too long
1060 1063 */
1061 1064 if (cnt > dnlc_long_chain) {
1062 1065 ncp = hp->hash_prev;
1063 1066 ncs.ncs_pick_last.value.ui64++;
1064 1067 vp = ncp->vp;
1065 1068 goto found;
1066 1069 }
1067 1070 }
1068 1071 /* check for race and continue */
1069 1072 if (hp->hash_next == (ncache_t *)hp) {
1070 1073 mutex_exit(&hp->hash_lock);
1071 1074 continue;
1072 1075 }
1073 1076
1074 1077 ncp = hp->hash_prev; /* pick the last one in the hash queue */
1075 1078 ncs.ncs_pick_last.value.ui64++;
1076 1079 vp = ncp->vp;
1077 1080 found:
1078 1081 /*
1079 1082 * Remove from hash chain.
1080 1083 */
1081 1084 nc_rmhash(ncp);
1082 1085 mutex_exit(&hp->hash_lock);
1083 1086 VN_RELE_DNLC(vp);
1084 1087 VN_RELE_DNLC(ncp->dp);
1085 1088 dnlc_free(ncp);
1086 1089 } while (dnlc_nentries > low_water);
1087 1090
1088 1091 dnlc_free_rotor = hp;
1089 1092 dnlc_reduce_idle = 1;
1090 1093 }
1091 1094
1092 1095 /*
1093 1096 * Directory caching routines
1094 1097 * ==========================
1095 1098 *
1096 1099 * See dnlc.h for details of the interfaces below.
1097 1100 */
1098 1101
1099 1102 /*
1100 1103 * Lookup up an entry in a complete or partial directory cache.
1101 1104 */
1102 1105 dcret_t
1103 1106 dnlc_dir_lookup(dcanchor_t *dcap, const char *name, uint64_t *handle)
1104 1107 {
1105 1108 dircache_t *dcp;
1106 1109 dcentry_t *dep;
1107 1110 int hash;
1108 1111 int ret;
1109 1112 uchar_t namlen;
1110 1113
1111 1114 /*
1112 1115 * can test without lock as we are only a cache
1113 1116 */
1114 1117 if (!VALID_DIR_CACHE(dcap->dca_dircache)) {
1115 1118 ncs.ncs_dir_misses.value.ui64++;
1116 1119 return (DNOCACHE);
1117 1120 }
1118 1121
1119 1122 if (!dnlc_dir_enable) {
1120 1123 return (DNOCACHE);
1121 1124 }
1122 1125
1123 1126 mutex_enter(&dcap->dca_lock);
1124 1127 dcp = (dircache_t *)dcap->dca_dircache;
1125 1128 if (VALID_DIR_CACHE(dcp)) {
1126 1129 dcp->dc_actime = ddi_get_lbolt64();
1127 1130 DNLC_DIR_HASH(name, hash, namlen);
1128 1131 dep = dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1129 1132 while (dep != NULL) {
1130 1133 if ((dep->de_hash == hash) &&
1131 1134 (namlen == dep->de_namelen) &&
1132 1135 bcmp(dep->de_name, name, namlen) == 0) {
1133 1136 *handle = dep->de_handle;
1134 1137 mutex_exit(&dcap->dca_lock);
1135 1138 ncs.ncs_dir_hits.value.ui64++;
1136 1139 return (DFOUND);
1137 1140 }
1138 1141 dep = dep->de_next;
1139 1142 }
1140 1143 if (dcp->dc_complete) {
1141 1144 ret = DNOENT;
1142 1145 } else {
1143 1146 ret = DNOCACHE;
1144 1147 }
1145 1148 mutex_exit(&dcap->dca_lock);
1146 1149 return (ret);
1147 1150 } else {
1148 1151 mutex_exit(&dcap->dca_lock);
1149 1152 ncs.ncs_dir_misses.value.ui64++;
1150 1153 return (DNOCACHE);
1151 1154 }
1152 1155 }
1153 1156
1154 1157 /*
1155 1158 * Start a new directory cache. An estimate of the number of
1156 1159 * entries is provided to as a quick check to ensure the directory
1157 1160 * is cacheable.
1158 1161 */
1159 1162 dcret_t
1160 1163 dnlc_dir_start(dcanchor_t *dcap, uint_t num_entries)
1161 1164 {
1162 1165 dircache_t *dcp;
1163 1166
1164 1167 if (!dnlc_dir_enable ||
1165 1168 (num_entries < dnlc_dir_min_size)) {
1166 1169 return (DNOCACHE);
1167 1170 }
1168 1171
1169 1172 if (num_entries > dnlc_dir_max_size) {
1170 1173 return (DTOOBIG);
1171 1174 }
1172 1175
1173 1176 mutex_enter(&dc_head.dch_lock);
1174 1177 mutex_enter(&dcap->dca_lock);
1175 1178
1176 1179 if (dcap->dca_dircache == DC_RET_LOW_MEM) {
1177 1180 dcap->dca_dircache = NULL;
1178 1181 mutex_exit(&dcap->dca_lock);
1179 1182 mutex_exit(&dc_head.dch_lock);
1180 1183 return (DNOMEM);
1181 1184 }
1182 1185
1183 1186 /*
1184 1187 * Check if there's currently a cache.
1185 1188 * This probably only occurs on a race.
1186 1189 */
1187 1190 if (dcap->dca_dircache != NULL) {
1188 1191 mutex_exit(&dcap->dca_lock);
1189 1192 mutex_exit(&dc_head.dch_lock);
1190 1193 return (DNOCACHE);
1191 1194 }
1192 1195
1193 1196 /*
1194 1197 * Allocate the dircache struct, entry and free space hash tables.
1195 1198 * These tables are initially just one entry but dynamically resize
1196 1199 * when entries and free space are added or removed.
1197 1200 */
1198 1201 if ((dcp = kmem_zalloc(sizeof (dircache_t), KM_NOSLEEP)) == NULL) {
1199 1202 goto error;
1200 1203 }
1201 1204 if ((dcp->dc_namehash = kmem_zalloc(sizeof (dcentry_t *),
1202 1205 KM_NOSLEEP)) == NULL) {
1203 1206 goto error;
1204 1207 }
1205 1208 if ((dcp->dc_freehash = kmem_zalloc(sizeof (dcfree_t *),
1206 1209 KM_NOSLEEP)) == NULL) {
1207 1210 goto error;
1208 1211 }
1209 1212
1210 1213 dcp->dc_anchor = dcap; /* set back pointer to anchor */
1211 1214 dcap->dca_dircache = dcp;
1212 1215
1213 1216 /* add into head of global chain */
1214 1217 dcp->dc_next = dc_head.dch_next;
1215 1218 dcp->dc_prev = (dircache_t *)&dc_head;
1216 1219 dcp->dc_next->dc_prev = dcp;
1217 1220 dc_head.dch_next = dcp;
1218 1221
1219 1222 mutex_exit(&dcap->dca_lock);
1220 1223 mutex_exit(&dc_head.dch_lock);
1221 1224 ncs.ncs_cur_dirs.value.ui64++;
1222 1225 ncs.ncs_dirs_cached.value.ui64++;
1223 1226 return (DOK);
1224 1227 error:
1225 1228 if (dcp != NULL) {
1226 1229 if (dcp->dc_namehash) {
1227 1230 kmem_free(dcp->dc_namehash, sizeof (dcentry_t *));
1228 1231 }
1229 1232 kmem_free(dcp, sizeof (dircache_t));
1230 1233 }
1231 1234 /*
1232 1235 * Must also kmem_free dcp->dc_freehash if more error cases are added
1233 1236 */
1234 1237 mutex_exit(&dcap->dca_lock);
1235 1238 mutex_exit(&dc_head.dch_lock);
1236 1239 ncs.ncs_dir_start_nm.value.ui64++;
1237 1240 return (DNOCACHE);
1238 1241 }
1239 1242
1240 1243 /*
1241 1244 * Add a directopry entry to a partial or complete directory cache.
1242 1245 */
1243 1246 dcret_t
1244 1247 dnlc_dir_add_entry(dcanchor_t *dcap, const char *name, uint64_t handle)
1245 1248 {
1246 1249 dircache_t *dcp;
1247 1250 dcentry_t **hp, *dep;
1248 1251 int hash;
1249 1252 uint_t capacity;
1250 1253 uchar_t namlen;
1251 1254
1252 1255 /*
1253 1256 * Allocate the dcentry struct, including the variable
1254 1257 * size name. Note, the null terminator is not copied.
1255 1258 *
1256 1259 * We do this outside the lock to avoid possible deadlock if
1257 1260 * dnlc_dir_reclaim() is called as a result of memory shortage.
1258 1261 */
1259 1262 DNLC_DIR_HASH(name, hash, namlen);
1260 1263 dep = kmem_alloc(sizeof (dcentry_t) - 1 + namlen, KM_NOSLEEP);
1261 1264 if (dep == NULL) {
1262 1265 #ifdef DEBUG
1263 1266 /*
1264 1267 * The kmem allocator generates random failures for
1265 1268 * KM_NOSLEEP calls (see KMEM_RANDOM_ALLOCATION_FAILURE)
1266 1269 * So try again before we blow away a perfectly good cache.
1267 1270 * This is done not to cover an error but purely for
1268 1271 * performance running a debug kernel.
1269 1272 * This random error only occurs in debug mode.
1270 1273 */
1271 1274 dep = kmem_alloc(sizeof (dcentry_t) - 1 + namlen, KM_NOSLEEP);
1272 1275 if (dep != NULL)
1273 1276 goto ok;
1274 1277 #endif
1275 1278 ncs.ncs_dir_add_nm.value.ui64++;
1276 1279 /*
1277 1280 * Free a directory cache. This may be the one we are
1278 1281 * called with.
1279 1282 */
1280 1283 dnlc_dir_reclaim(NULL);
1281 1284 dep = kmem_alloc(sizeof (dcentry_t) - 1 + namlen, KM_NOSLEEP);
1282 1285 if (dep == NULL) {
1283 1286 /*
1284 1287 * still no memory, better delete this cache
1285 1288 */
1286 1289 mutex_enter(&dcap->dca_lock);
1287 1290 dcp = (dircache_t *)dcap->dca_dircache;
1288 1291 if (VALID_DIR_CACHE(dcp)) {
1289 1292 dnlc_dir_abort(dcp);
1290 1293 dcap->dca_dircache = DC_RET_LOW_MEM;
1291 1294 }
1292 1295 mutex_exit(&dcap->dca_lock);
1293 1296 ncs.ncs_dir_addabort.value.ui64++;
1294 1297 return (DNOCACHE);
1295 1298 }
1296 1299 /*
1297 1300 * fall through as if the 1st kmem_alloc had worked
1298 1301 */
1299 1302 }
1300 1303 #ifdef DEBUG
1301 1304 ok:
1302 1305 #endif
1303 1306 mutex_enter(&dcap->dca_lock);
1304 1307 dcp = (dircache_t *)dcap->dca_dircache;
1305 1308 if (VALID_DIR_CACHE(dcp)) {
1306 1309 /*
1307 1310 * If the total number of entries goes above the max
1308 1311 * then free this cache
1309 1312 */
1310 1313 if ((dcp->dc_num_entries + dcp->dc_num_free) >
1311 1314 dnlc_dir_max_size) {
1312 1315 mutex_exit(&dcap->dca_lock);
1313 1316 dnlc_dir_purge(dcap);
1314 1317 kmem_free(dep, sizeof (dcentry_t) - 1 + namlen);
1315 1318 ncs.ncs_dir_add_max.value.ui64++;
1316 1319 return (DTOOBIG);
1317 1320 }
1318 1321 dcp->dc_num_entries++;
1319 1322 capacity = (dcp->dc_nhash_mask + 1) << dnlc_dir_hash_size_shift;
1320 1323 if (dcp->dc_num_entries >=
1321 1324 (capacity << dnlc_dir_hash_resize_shift)) {
1322 1325 dnlc_dir_adjust_nhash(dcp);
1323 1326 }
1324 1327 hp = &dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1325 1328
1326 1329 /*
1327 1330 * Initialise and chain in new entry
1328 1331 */
1329 1332 dep->de_handle = handle;
1330 1333 dep->de_hash = hash;
1331 1334 /*
1332 1335 * Note de_namelen is a uchar_t to conserve space
1333 1336 * and alignment padding. The max length of any
1334 1337 * pathname component is defined as MAXNAMELEN
1335 1338 * which is 256 (including the terminating null).
1336 1339 * So provided this doesn't change, we don't include the null,
1337 1340 * we always use bcmp to compare strings, and we don't
1338 1341 * start storing full names, then we are ok.
1339 1342 * The space savings is worth it.
1340 1343 */
1341 1344 dep->de_namelen = namlen;
1342 1345 bcopy(name, dep->de_name, namlen);
1343 1346 dep->de_next = *hp;
1344 1347 *hp = dep;
1345 1348 dcp->dc_actime = ddi_get_lbolt64();
1346 1349 mutex_exit(&dcap->dca_lock);
1347 1350 ncs.ncs_dir_num_ents.value.ui64++;
1348 1351 return (DOK);
1349 1352 } else {
1350 1353 mutex_exit(&dcap->dca_lock);
1351 1354 kmem_free(dep, sizeof (dcentry_t) - 1 + namlen);
1352 1355 return (DNOCACHE);
1353 1356 }
1354 1357 }
1355 1358
1356 1359 /*
1357 1360 * Add free space to a partial or complete directory cache.
1358 1361 */
1359 1362 dcret_t
1360 1363 dnlc_dir_add_space(dcanchor_t *dcap, uint_t len, uint64_t handle)
1361 1364 {
1362 1365 dircache_t *dcp;
1363 1366 dcfree_t *dfp, **hp;
1364 1367 uint_t capacity;
1365 1368
1366 1369 /*
1367 1370 * We kmem_alloc outside the lock to avoid possible deadlock if
1368 1371 * dnlc_dir_reclaim() is called as a result of memory shortage.
1369 1372 */
1370 1373 dfp = kmem_cache_alloc(dnlc_dir_space_cache, KM_NOSLEEP);
1371 1374 if (dfp == NULL) {
1372 1375 #ifdef DEBUG
1373 1376 /*
1374 1377 * The kmem allocator generates random failures for
1375 1378 * KM_NOSLEEP calls (see KMEM_RANDOM_ALLOCATION_FAILURE)
1376 1379 * So try again before we blow away a perfectly good cache.
1377 1380 * This random error only occurs in debug mode
1378 1381 */
1379 1382 dfp = kmem_cache_alloc(dnlc_dir_space_cache, KM_NOSLEEP);
1380 1383 if (dfp != NULL)
1381 1384 goto ok;
1382 1385 #endif
1383 1386 ncs.ncs_dir_add_nm.value.ui64++;
1384 1387 /*
1385 1388 * Free a directory cache. This may be the one we are
1386 1389 * called with.
1387 1390 */
1388 1391 dnlc_dir_reclaim(NULL);
1389 1392 dfp = kmem_cache_alloc(dnlc_dir_space_cache, KM_NOSLEEP);
1390 1393 if (dfp == NULL) {
1391 1394 /*
1392 1395 * still no memory, better delete this cache
1393 1396 */
1394 1397 mutex_enter(&dcap->dca_lock);
1395 1398 dcp = (dircache_t *)dcap->dca_dircache;
1396 1399 if (VALID_DIR_CACHE(dcp)) {
1397 1400 dnlc_dir_abort(dcp);
1398 1401 dcap->dca_dircache = DC_RET_LOW_MEM;
1399 1402 }
1400 1403 mutex_exit(&dcap->dca_lock);
1401 1404 ncs.ncs_dir_addabort.value.ui64++;
1402 1405 return (DNOCACHE);
1403 1406 }
1404 1407 /*
1405 1408 * fall through as if the 1st kmem_alloc had worked
1406 1409 */
1407 1410 }
1408 1411
1409 1412 #ifdef DEBUG
1410 1413 ok:
1411 1414 #endif
1412 1415 mutex_enter(&dcap->dca_lock);
1413 1416 dcp = (dircache_t *)dcap->dca_dircache;
1414 1417 if (VALID_DIR_CACHE(dcp)) {
1415 1418 if ((dcp->dc_num_entries + dcp->dc_num_free) >
1416 1419 dnlc_dir_max_size) {
1417 1420 mutex_exit(&dcap->dca_lock);
1418 1421 dnlc_dir_purge(dcap);
1419 1422 kmem_cache_free(dnlc_dir_space_cache, dfp);
1420 1423 ncs.ncs_dir_add_max.value.ui64++;
1421 1424 return (DTOOBIG);
1422 1425 }
1423 1426 dcp->dc_num_free++;
1424 1427 capacity = (dcp->dc_fhash_mask + 1) << dnlc_dir_hash_size_shift;
1425 1428 if (dcp->dc_num_free >=
1426 1429 (capacity << dnlc_dir_hash_resize_shift)) {
1427 1430 dnlc_dir_adjust_fhash(dcp);
1428 1431 }
1429 1432 /*
1430 1433 * Initialise and chain a new entry
1431 1434 */
1432 1435 dfp->df_handle = handle;
1433 1436 dfp->df_len = len;
1434 1437 dcp->dc_actime = ddi_get_lbolt64();
1435 1438 hp = &(dcp->dc_freehash[DDFHASH(handle, dcp)]);
1436 1439 dfp->df_next = *hp;
1437 1440 *hp = dfp;
1438 1441 mutex_exit(&dcap->dca_lock);
1439 1442 ncs.ncs_dir_num_ents.value.ui64++;
1440 1443 return (DOK);
1441 1444 } else {
1442 1445 mutex_exit(&dcap->dca_lock);
1443 1446 kmem_cache_free(dnlc_dir_space_cache, dfp);
1444 1447 return (DNOCACHE);
1445 1448 }
1446 1449 }
1447 1450
1448 1451 /*
1449 1452 * Mark a directory cache as complete.
1450 1453 */
1451 1454 void
1452 1455 dnlc_dir_complete(dcanchor_t *dcap)
1453 1456 {
1454 1457 dircache_t *dcp;
1455 1458
1456 1459 mutex_enter(&dcap->dca_lock);
1457 1460 dcp = (dircache_t *)dcap->dca_dircache;
1458 1461 if (VALID_DIR_CACHE(dcp)) {
1459 1462 dcp->dc_complete = B_TRUE;
1460 1463 }
1461 1464 mutex_exit(&dcap->dca_lock);
1462 1465 }
1463 1466
1464 1467 /*
1465 1468 * Internal routine to delete a partial or full directory cache.
1466 1469 * No additional locking needed.
1467 1470 */
1468 1471 static void
1469 1472 dnlc_dir_abort(dircache_t *dcp)
1470 1473 {
1471 1474 dcentry_t *dep, *nhp;
1472 1475 dcfree_t *fep, *fhp;
1473 1476 uint_t nhtsize = dcp->dc_nhash_mask + 1; /* name hash table size */
1474 1477 uint_t fhtsize = dcp->dc_fhash_mask + 1; /* free hash table size */
1475 1478 uint_t i;
1476 1479
1477 1480 /*
1478 1481 * Free up the cached name entries and hash table
1479 1482 */
1480 1483 for (i = 0; i < nhtsize; i++) { /* for each hash bucket */
1481 1484 nhp = dcp->dc_namehash[i];
1482 1485 while (nhp != NULL) { /* for each chained entry */
1483 1486 dep = nhp->de_next;
1484 1487 kmem_free(nhp, sizeof (dcentry_t) - 1 +
1485 1488 nhp->de_namelen);
1486 1489 nhp = dep;
1487 1490 }
1488 1491 }
1489 1492 kmem_free(dcp->dc_namehash, sizeof (dcentry_t *) * nhtsize);
1490 1493
1491 1494 /*
1492 1495 * Free up the free space entries and hash table
1493 1496 */
1494 1497 for (i = 0; i < fhtsize; i++) { /* for each hash bucket */
1495 1498 fhp = dcp->dc_freehash[i];
1496 1499 while (fhp != NULL) { /* for each chained entry */
1497 1500 fep = fhp->df_next;
1498 1501 kmem_cache_free(dnlc_dir_space_cache, fhp);
1499 1502 fhp = fep;
1500 1503 }
1501 1504 }
1502 1505 kmem_free(dcp->dc_freehash, sizeof (dcfree_t *) * fhtsize);
1503 1506
1504 1507 /*
1505 1508 * Finally free the directory cache structure itself
1506 1509 */
1507 1510 ncs.ncs_dir_num_ents.value.ui64 -= (dcp->dc_num_entries +
1508 1511 dcp->dc_num_free);
1509 1512 kmem_free(dcp, sizeof (dircache_t));
1510 1513 ncs.ncs_cur_dirs.value.ui64--;
1511 1514 }
1512 1515
1513 1516 /*
1514 1517 * Remove a partial or complete directory cache
1515 1518 */
1516 1519 void
1517 1520 dnlc_dir_purge(dcanchor_t *dcap)
1518 1521 {
1519 1522 dircache_t *dcp;
1520 1523
1521 1524 mutex_enter(&dc_head.dch_lock);
1522 1525 mutex_enter(&dcap->dca_lock);
1523 1526 dcp = (dircache_t *)dcap->dca_dircache;
1524 1527 if (!VALID_DIR_CACHE(dcp)) {
1525 1528 mutex_exit(&dcap->dca_lock);
1526 1529 mutex_exit(&dc_head.dch_lock);
1527 1530 return;
1528 1531 }
1529 1532 dcap->dca_dircache = NULL;
1530 1533 /*
1531 1534 * Unchain from global list
1532 1535 */
1533 1536 dcp->dc_prev->dc_next = dcp->dc_next;
1534 1537 dcp->dc_next->dc_prev = dcp->dc_prev;
1535 1538 mutex_exit(&dcap->dca_lock);
1536 1539 mutex_exit(&dc_head.dch_lock);
1537 1540 dnlc_dir_abort(dcp);
1538 1541 }
1539 1542
1540 1543 /*
1541 1544 * Remove an entry from a complete or partial directory cache.
1542 1545 * Return the handle if it's non null.
1543 1546 */
1544 1547 dcret_t
1545 1548 dnlc_dir_rem_entry(dcanchor_t *dcap, const char *name, uint64_t *handlep)
1546 1549 {
1547 1550 dircache_t *dcp;
1548 1551 dcentry_t **prevpp, *te;
1549 1552 uint_t capacity;
1550 1553 int hash;
1551 1554 int ret;
1552 1555 uchar_t namlen;
1553 1556
1554 1557 if (!dnlc_dir_enable) {
1555 1558 return (DNOCACHE);
1556 1559 }
1557 1560
1558 1561 mutex_enter(&dcap->dca_lock);
1559 1562 dcp = (dircache_t *)dcap->dca_dircache;
1560 1563 if (VALID_DIR_CACHE(dcp)) {
1561 1564 dcp->dc_actime = ddi_get_lbolt64();
1562 1565 if (dcp->dc_nhash_mask > 0) { /* ie not minimum */
1563 1566 capacity = (dcp->dc_nhash_mask + 1) <<
1564 1567 dnlc_dir_hash_size_shift;
1565 1568 if (dcp->dc_num_entries <=
1566 1569 (capacity >> dnlc_dir_hash_resize_shift)) {
1567 1570 dnlc_dir_adjust_nhash(dcp);
1568 1571 }
1569 1572 }
1570 1573 DNLC_DIR_HASH(name, hash, namlen);
1571 1574 prevpp = &dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1572 1575 while (*prevpp != NULL) {
1573 1576 if (((*prevpp)->de_hash == hash) &&
1574 1577 (namlen == (*prevpp)->de_namelen) &&
1575 1578 bcmp((*prevpp)->de_name, name, namlen) == 0) {
1576 1579 if (handlep != NULL) {
1577 1580 *handlep = (*prevpp)->de_handle;
1578 1581 }
1579 1582 te = *prevpp;
1580 1583 *prevpp = (*prevpp)->de_next;
1581 1584 kmem_free(te, sizeof (dcentry_t) - 1 +
1582 1585 te->de_namelen);
1583 1586
1584 1587 /*
1585 1588 * If the total number of entries
1586 1589 * falls below half the minimum number
1587 1590 * of entries then free this cache.
1588 1591 */
1589 1592 if (--dcp->dc_num_entries <
1590 1593 (dnlc_dir_min_size >> 1)) {
1591 1594 mutex_exit(&dcap->dca_lock);
1592 1595 dnlc_dir_purge(dcap);
1593 1596 } else {
1594 1597 mutex_exit(&dcap->dca_lock);
1595 1598 }
1596 1599 ncs.ncs_dir_num_ents.value.ui64--;
1597 1600 return (DFOUND);
1598 1601 }
1599 1602 prevpp = &((*prevpp)->de_next);
1600 1603 }
1601 1604 if (dcp->dc_complete) {
1602 1605 ncs.ncs_dir_reme_fai.value.ui64++;
1603 1606 ret = DNOENT;
1604 1607 } else {
1605 1608 ret = DNOCACHE;
1606 1609 }
1607 1610 mutex_exit(&dcap->dca_lock);
1608 1611 return (ret);
1609 1612 } else {
1610 1613 mutex_exit(&dcap->dca_lock);
1611 1614 return (DNOCACHE);
1612 1615 }
1613 1616 }
1614 1617
1615 1618
1616 1619 /*
1617 1620 * Remove free space of at least the given length from a complete
1618 1621 * or partial directory cache.
1619 1622 */
1620 1623 dcret_t
1621 1624 dnlc_dir_rem_space_by_len(dcanchor_t *dcap, uint_t len, uint64_t *handlep)
1622 1625 {
1623 1626 dircache_t *dcp;
1624 1627 dcfree_t **prevpp, *tfp;
1625 1628 uint_t fhtsize; /* free hash table size */
1626 1629 uint_t i;
1627 1630 uint_t capacity;
1628 1631 int ret;
1629 1632
1630 1633 if (!dnlc_dir_enable) {
1631 1634 return (DNOCACHE);
1632 1635 }
1633 1636
1634 1637 mutex_enter(&dcap->dca_lock);
1635 1638 dcp = (dircache_t *)dcap->dca_dircache;
1636 1639 if (VALID_DIR_CACHE(dcp)) {
1637 1640 dcp->dc_actime = ddi_get_lbolt64();
1638 1641 if (dcp->dc_fhash_mask > 0) { /* ie not minimum */
1639 1642 capacity = (dcp->dc_fhash_mask + 1) <<
1640 1643 dnlc_dir_hash_size_shift;
1641 1644 if (dcp->dc_num_free <=
1642 1645 (capacity >> dnlc_dir_hash_resize_shift)) {
1643 1646 dnlc_dir_adjust_fhash(dcp);
1644 1647 }
1645 1648 }
1646 1649 /*
1647 1650 * Search for an entry of the appropriate size
1648 1651 * on a first fit basis.
1649 1652 */
1650 1653 fhtsize = dcp->dc_fhash_mask + 1;
1651 1654 for (i = 0; i < fhtsize; i++) { /* for each hash bucket */
1652 1655 prevpp = &(dcp->dc_freehash[i]);
1653 1656 while (*prevpp != NULL) {
1654 1657 if ((*prevpp)->df_len >= len) {
1655 1658 *handlep = (*prevpp)->df_handle;
1656 1659 tfp = *prevpp;
1657 1660 *prevpp = (*prevpp)->df_next;
1658 1661 dcp->dc_num_free--;
1659 1662 mutex_exit(&dcap->dca_lock);
1660 1663 kmem_cache_free(dnlc_dir_space_cache,
1661 1664 tfp);
1662 1665 ncs.ncs_dir_num_ents.value.ui64--;
1663 1666 return (DFOUND);
1664 1667 }
1665 1668 prevpp = &((*prevpp)->df_next);
1666 1669 }
1667 1670 }
1668 1671 if (dcp->dc_complete) {
1669 1672 ret = DNOENT;
1670 1673 } else {
1671 1674 ret = DNOCACHE;
1672 1675 }
1673 1676 mutex_exit(&dcap->dca_lock);
1674 1677 return (ret);
1675 1678 } else {
1676 1679 mutex_exit(&dcap->dca_lock);
1677 1680 return (DNOCACHE);
1678 1681 }
1679 1682 }
1680 1683
1681 1684 /*
1682 1685 * Remove free space with the given handle from a complete or partial
1683 1686 * directory cache.
1684 1687 */
1685 1688 dcret_t
1686 1689 dnlc_dir_rem_space_by_handle(dcanchor_t *dcap, uint64_t handle)
1687 1690 {
1688 1691 dircache_t *dcp;
1689 1692 dcfree_t **prevpp, *tfp;
1690 1693 uint_t capacity;
1691 1694 int ret;
1692 1695
1693 1696 if (!dnlc_dir_enable) {
1694 1697 return (DNOCACHE);
1695 1698 }
1696 1699
1697 1700 mutex_enter(&dcap->dca_lock);
1698 1701 dcp = (dircache_t *)dcap->dca_dircache;
1699 1702 if (VALID_DIR_CACHE(dcp)) {
1700 1703 dcp->dc_actime = ddi_get_lbolt64();
1701 1704 if (dcp->dc_fhash_mask > 0) { /* ie not minimum */
1702 1705 capacity = (dcp->dc_fhash_mask + 1) <<
1703 1706 dnlc_dir_hash_size_shift;
1704 1707 if (dcp->dc_num_free <=
1705 1708 (capacity >> dnlc_dir_hash_resize_shift)) {
1706 1709 dnlc_dir_adjust_fhash(dcp);
1707 1710 }
1708 1711 }
1709 1712
1710 1713 /*
1711 1714 * search for the exact entry
1712 1715 */
1713 1716 prevpp = &(dcp->dc_freehash[DDFHASH(handle, dcp)]);
1714 1717 while (*prevpp != NULL) {
1715 1718 if ((*prevpp)->df_handle == handle) {
1716 1719 tfp = *prevpp;
1717 1720 *prevpp = (*prevpp)->df_next;
1718 1721 dcp->dc_num_free--;
1719 1722 mutex_exit(&dcap->dca_lock);
1720 1723 kmem_cache_free(dnlc_dir_space_cache, tfp);
1721 1724 ncs.ncs_dir_num_ents.value.ui64--;
1722 1725 return (DFOUND);
1723 1726 }
1724 1727 prevpp = &((*prevpp)->df_next);
1725 1728 }
1726 1729 if (dcp->dc_complete) {
1727 1730 ncs.ncs_dir_rems_fai.value.ui64++;
1728 1731 ret = DNOENT;
1729 1732 } else {
1730 1733 ret = DNOCACHE;
1731 1734 }
1732 1735 mutex_exit(&dcap->dca_lock);
1733 1736 return (ret);
1734 1737 } else {
1735 1738 mutex_exit(&dcap->dca_lock);
1736 1739 return (DNOCACHE);
1737 1740 }
1738 1741 }
1739 1742
1740 1743 /*
1741 1744 * Update the handle of an directory cache entry.
1742 1745 */
1743 1746 dcret_t
1744 1747 dnlc_dir_update(dcanchor_t *dcap, const char *name, uint64_t handle)
1745 1748 {
1746 1749 dircache_t *dcp;
1747 1750 dcentry_t *dep;
1748 1751 int hash;
1749 1752 int ret;
1750 1753 uchar_t namlen;
1751 1754
1752 1755 if (!dnlc_dir_enable) {
1753 1756 return (DNOCACHE);
1754 1757 }
1755 1758
1756 1759 mutex_enter(&dcap->dca_lock);
1757 1760 dcp = (dircache_t *)dcap->dca_dircache;
1758 1761 if (VALID_DIR_CACHE(dcp)) {
1759 1762 dcp->dc_actime = ddi_get_lbolt64();
1760 1763 DNLC_DIR_HASH(name, hash, namlen);
1761 1764 dep = dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1762 1765 while (dep != NULL) {
1763 1766 if ((dep->de_hash == hash) &&
1764 1767 (namlen == dep->de_namelen) &&
1765 1768 bcmp(dep->de_name, name, namlen) == 0) {
1766 1769 dep->de_handle = handle;
1767 1770 mutex_exit(&dcap->dca_lock);
1768 1771 return (DFOUND);
1769 1772 }
1770 1773 dep = dep->de_next;
1771 1774 }
1772 1775 if (dcp->dc_complete) {
1773 1776 ncs.ncs_dir_upd_fail.value.ui64++;
1774 1777 ret = DNOENT;
1775 1778 } else {
1776 1779 ret = DNOCACHE;
1777 1780 }
1778 1781 mutex_exit(&dcap->dca_lock);
1779 1782 return (ret);
1780 1783 } else {
1781 1784 mutex_exit(&dcap->dca_lock);
1782 1785 return (DNOCACHE);
1783 1786 }
1784 1787 }
1785 1788
1786 1789 void
1787 1790 dnlc_dir_fini(dcanchor_t *dcap)
1788 1791 {
1789 1792 dircache_t *dcp;
1790 1793
1791 1794 mutex_enter(&dc_head.dch_lock);
1792 1795 mutex_enter(&dcap->dca_lock);
1793 1796 dcp = (dircache_t *)dcap->dca_dircache;
1794 1797 if (VALID_DIR_CACHE(dcp)) {
1795 1798 /*
1796 1799 * Unchain from global list
1797 1800 */
1798 1801 ncs.ncs_dir_finipurg.value.ui64++;
1799 1802 dcp->dc_prev->dc_next = dcp->dc_next;
1800 1803 dcp->dc_next->dc_prev = dcp->dc_prev;
1801 1804 } else {
1802 1805 dcp = NULL;
1803 1806 }
1804 1807 dcap->dca_dircache = NULL;
1805 1808 mutex_exit(&dcap->dca_lock);
1806 1809 mutex_exit(&dc_head.dch_lock);
1807 1810 mutex_destroy(&dcap->dca_lock);
1808 1811 if (dcp) {
1809 1812 dnlc_dir_abort(dcp);
1810 1813 }
1811 1814 }
1812 1815
1813 1816 /*
1814 1817 * Reclaim callback for dnlc directory caching.
1815 1818 * Invoked by the kernel memory allocator when memory gets tight.
1816 1819 * This is a pretty serious condition and can lead easily lead to system
1817 1820 * hangs if not enough space is returned.
1818 1821 *
1819 1822 * Deciding which directory (or directories) to purge is tricky.
1820 1823 * Purging everything is an overkill, but purging just the oldest used
1821 1824 * was found to lead to hangs. The largest cached directories use the
1822 1825 * most memory, but take the most effort to rebuild, whereas the smaller
1823 1826 * ones have little value and give back little space. So what to do?
1824 1827 *
1825 1828 * The current policy is to continue purging the oldest used directories
1826 1829 * until at least dnlc_dir_min_reclaim directory entries have been purged.
1827 1830 */
1828 1831 /*ARGSUSED*/
1829 1832 static void
1830 1833 dnlc_dir_reclaim(void *unused)
1831 1834 {
1832 1835 dircache_t *dcp, *oldest;
1833 1836 uint_t dirent_cnt = 0;
1834 1837
1835 1838 mutex_enter(&dc_head.dch_lock);
1836 1839 while (dirent_cnt < dnlc_dir_min_reclaim) {
1837 1840 dcp = dc_head.dch_next;
1838 1841 oldest = NULL;
1839 1842 while (dcp != (dircache_t *)&dc_head) {
1840 1843 if (oldest == NULL) {
1841 1844 oldest = dcp;
1842 1845 } else {
1843 1846 if (dcp->dc_actime < oldest->dc_actime) {
1844 1847 oldest = dcp;
1845 1848 }
1846 1849 }
1847 1850 dcp = dcp->dc_next;
1848 1851 }
1849 1852 if (oldest == NULL) {
1850 1853 /* nothing to delete */
1851 1854 mutex_exit(&dc_head.dch_lock);
1852 1855 return;
1853 1856 }
1854 1857 /*
1855 1858 * remove from directory chain and purge
1856 1859 */
1857 1860 oldest->dc_prev->dc_next = oldest->dc_next;
1858 1861 oldest->dc_next->dc_prev = oldest->dc_prev;
1859 1862 mutex_enter(&oldest->dc_anchor->dca_lock);
1860 1863 /*
1861 1864 * If this was the last entry then it must be too large.
1862 1865 * Mark it as such by saving a special dircache_t
1863 1866 * pointer (DC_RET_LOW_MEM) in the anchor. The error DNOMEM
1864 1867 * will be presented to the caller of dnlc_dir_start()
1865 1868 */
1866 1869 if (oldest->dc_next == oldest->dc_prev) {
1867 1870 oldest->dc_anchor->dca_dircache = DC_RET_LOW_MEM;
1868 1871 ncs.ncs_dir_rec_last.value.ui64++;
1869 1872 } else {
1870 1873 oldest->dc_anchor->dca_dircache = NULL;
1871 1874 ncs.ncs_dir_recl_any.value.ui64++;
1872 1875 }
1873 1876 mutex_exit(&oldest->dc_anchor->dca_lock);
1874 1877 dirent_cnt += oldest->dc_num_entries;
1875 1878 dnlc_dir_abort(oldest);
1876 1879 }
1877 1880 mutex_exit(&dc_head.dch_lock);
1878 1881 }
1879 1882
1880 1883 /*
1881 1884 * Dynamically grow or shrink the size of the name hash table
1882 1885 */
1883 1886 static void
1884 1887 dnlc_dir_adjust_nhash(dircache_t *dcp)
1885 1888 {
1886 1889 dcentry_t **newhash, *dep, **nhp, *tep;
1887 1890 uint_t newsize;
1888 1891 uint_t oldsize;
1889 1892 uint_t newsizemask;
1890 1893 int i;
1891 1894
1892 1895 /*
1893 1896 * Allocate new hash table
1894 1897 */
1895 1898 newsize = dcp->dc_num_entries >> dnlc_dir_hash_size_shift;
1896 1899 newhash = kmem_zalloc(sizeof (dcentry_t *) * newsize, KM_NOSLEEP);
1897 1900 if (newhash == NULL) {
1898 1901 /*
1899 1902 * System is short on memory just return
1900 1903 * Note, the old hash table is still usable.
1901 1904 * This return is unlikely to repeatedy occur, because
1902 1905 * either some other directory caches will be reclaimed
1903 1906 * due to memory shortage, thus freeing memory, or this
1904 1907 * directory cahe will be reclaimed.
1905 1908 */
1906 1909 return;
1907 1910 }
1908 1911 oldsize = dcp->dc_nhash_mask + 1;
1909 1912 dcp->dc_nhash_mask = newsizemask = newsize - 1;
1910 1913
1911 1914 /*
1912 1915 * Move entries from the old table to the new
1913 1916 */
1914 1917 for (i = 0; i < oldsize; i++) { /* for each hash bucket */
1915 1918 dep = dcp->dc_namehash[i];
1916 1919 while (dep != NULL) { /* for each chained entry */
1917 1920 tep = dep;
1918 1921 dep = dep->de_next;
1919 1922 nhp = &newhash[tep->de_hash & newsizemask];
1920 1923 tep->de_next = *nhp;
1921 1924 *nhp = tep;
1922 1925 }
1923 1926 }
1924 1927
1925 1928 /*
1926 1929 * delete old hash table and set new one in place
1927 1930 */
1928 1931 kmem_free(dcp->dc_namehash, sizeof (dcentry_t *) * oldsize);
1929 1932 dcp->dc_namehash = newhash;
1930 1933 }
1931 1934
1932 1935 /*
1933 1936 * Dynamically grow or shrink the size of the free space hash table
1934 1937 */
1935 1938 static void
1936 1939 dnlc_dir_adjust_fhash(dircache_t *dcp)
1937 1940 {
1938 1941 dcfree_t **newhash, *dfp, **nhp, *tfp;
1939 1942 uint_t newsize;
1940 1943 uint_t oldsize;
1941 1944 int i;
1942 1945
1943 1946 /*
1944 1947 * Allocate new hash table
1945 1948 */
1946 1949 newsize = dcp->dc_num_free >> dnlc_dir_hash_size_shift;
1947 1950 newhash = kmem_zalloc(sizeof (dcfree_t *) * newsize, KM_NOSLEEP);
1948 1951 if (newhash == NULL) {
1949 1952 /*
1950 1953 * System is short on memory just return
1951 1954 * Note, the old hash table is still usable.
1952 1955 * This return is unlikely to repeatedy occur, because
1953 1956 * either some other directory caches will be reclaimed
1954 1957 * due to memory shortage, thus freeing memory, or this
1955 1958 * directory cahe will be reclaimed.
1956 1959 */
1957 1960 return;
1958 1961 }
1959 1962 oldsize = dcp->dc_fhash_mask + 1;
1960 1963 dcp->dc_fhash_mask = newsize - 1;
1961 1964
1962 1965 /*
1963 1966 * Move entries from the old table to the new
1964 1967 */
1965 1968 for (i = 0; i < oldsize; i++) { /* for each hash bucket */
1966 1969 dfp = dcp->dc_freehash[i];
1967 1970 while (dfp != NULL) { /* for each chained entry */
1968 1971 tfp = dfp;
1969 1972 dfp = dfp->df_next;
1970 1973 nhp = &newhash[DDFHASH(tfp->df_handle, dcp)];
1971 1974 tfp->df_next = *nhp;
1972 1975 *nhp = tfp;
1973 1976 }
1974 1977 }
1975 1978
1976 1979 /*
1977 1980 * delete old hash table and set new one in place
1978 1981 */
1979 1982 kmem_free(dcp->dc_freehash, sizeof (dcfree_t *) * oldsize);
1980 1983 dcp->dc_freehash = newhash;
1981 1984 }
|
↓ open down ↓ |
1663 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX