1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
24 */
25
26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28
29 /*
30 * University Copyright- Copyright (c) 1982, 1986, 1988
31 * The Regents of the University of California
32 * All Rights Reserved
33 *
34 * University Acknowledgment- Portions of this document are derived from
35 * software developed by the University of California, Berkeley, and its
36 * contributors.
37 */
38
39 #ifndef _SYS_DNLC_H
40 #define _SYS_DNLC_H
41
42 #ifdef __cplusplus
43 extern "C" {
44 #endif
45
46 #include <sys/kstat.h>
47
48 /*
49 * DNLC - Directory name lookup cache.
50 * There are now two sorts of name caching:
51 *
52 * Standard dnlc: This original cache holds recent mappings
53 * of <directory vnode, name> to vnode mappings.
54 *
55 * Directory caches: Entire large directories can be cached, subject to
56 * memory availability and tunables. A directory cache
57 * anchor point must be provided in the xxnode for
58 * a directory.
59 */
60
61
62 /*
63 * Standard dnlc
64 * =============
65 */
66
67 /*
68 * This structure describes the elements in the cache of recent
69 * names looked up.
70 *
71 * Note namlen is a uchar_t to conserve space
72 * and alignment padding. The max length of any
73 * pathname component is defined as MAXNAMELEN
74 * which is 256 (including the terminating null).
75 * So provided this doesn't change, we don't include the null,
76 * we always use bcmp to compare strings, and we don't start
77 * storing full names, then we are ok. The space savings are worth it.
78 */
79 typedef struct ncache {
80 struct ncache *hash_next; /* hash chain, MUST BE FIRST */
81 struct ncache *hash_prev;
82 struct vnode *vp; /* vnode the name refers to */
83 struct vnode *dp; /* vnode of parent of name */
84 int hash; /* hash signature */
85 uchar_t namlen; /* length of name */
86 char name[1]; /* segment name - null terminated */
87 } ncache_t;
88
89 /*
90 * Hash table bucket structure of name cache entries for fast lookup.
91 */
92 typedef struct nc_hash {
93 ncache_t *hash_next;
94 ncache_t *hash_prev;
95 kmutex_t hash_lock;
96 } nc_hash_t;
97
98 /*
99 * Statistics on name cache
100 * Not protected by locks
101 */
102 /*
103 * ncstats has been deprecated, due to the integer size of the counters
104 * which can easily overflow in the dnlc.
105 * It is maintained (at some expense) for compatability.
106 * The preferred interface is the kstat accessible nc_stats below, ehich
107 * is actually shared with directory caching.
108 */
109 struct ncstats {
110 int hits; /* hits that we can really use */
111 int misses; /* cache misses */
112 int enters; /* number of enters done */
113 int dbl_enters; /* number of enters tried when already cached */
114 int long_enter; /* deprecated, no longer accounted */
115 int long_look; /* deprecated, no longer accounted */
116 int move_to_front; /* entry moved to front of hash chain */
117 int purges; /* number of purges of cache */
118 };
119
120 struct nc_stats {
121 kstat_named_t ncs_hits; /* cache hits */
122 kstat_named_t ncs_misses; /* cache misses */
123 kstat_named_t ncs_neg_hits; /* negative cache hits */
124 kstat_named_t ncs_enters; /* enters */
125 kstat_named_t ncs_dbl_enters; /* enters when entry already cached */
126 kstat_named_t ncs_purge_total; /* total entries prurged */
127 kstat_named_t ncs_purge_all; /* dnlc_purge() calls */
128 kstat_named_t ncs_purge_vp; /* dnlc_purge_vp() calls */
129 kstat_named_t ncs_purge_vfs; /* dnlc_purge_vfs() calls */
130 kstat_named_t ncs_purge_fs1; /* dnlc_purge_fs1() calls */
131 kstat_named_t ncs_pick_free; /* found a free ncache */
132 kstat_named_t ncs_pick_heur; /* found ncache w/ NULL vpages */
133 kstat_named_t ncs_pick_last; /* found last ncache on chain */
134
135 /* directory caching stats */
136
137 kstat_named_t ncs_dir_hits; /* dir cache hits */
138 kstat_named_t ncs_dir_misses; /* dir cache misses */
139 kstat_named_t ncs_cur_dirs; /* current # directories cached */
140 kstat_named_t ncs_dir_num_ents; /* current # entries cached */
141 kstat_named_t ncs_dirs_cached; /* total # directories cached */
142 kstat_named_t ncs_dir_start_nm; /* dir start no memory */
143 kstat_named_t ncs_dir_add_nm; /* add entry/space - no memory */
144 kstat_named_t ncs_dir_addabort; /* add entry/space - abort */
145 kstat_named_t ncs_dir_add_max; /* add entry/space - max exceeded */
146 kstat_named_t ncs_dir_reme_fai; /* remove entry fail */
147 kstat_named_t ncs_dir_rems_fai; /* remove space fail */
148 kstat_named_t ncs_dir_upd_fail; /* update space fail */
149 kstat_named_t ncs_dir_finipurg; /* fini purges */
150 kstat_named_t ncs_dir_rec_last; /* reclaim last */
151 kstat_named_t ncs_dir_recl_any; /* reclaim any */
152 };
153
154 /*
155 * The dnlc hashing function.
156 * Although really a kernel macro we export it to allow validation
157 * of ncache_t entries by mdb. Note, mdb can handle the ASSERT.
158 *
159 * 'hash' and 'namlen' must be l-values. A check is made to ensure
160 * the name length fits into an unsigned char (see ncache_t).
161 */
162 #define DNLCHASH(name, dvp, hash, namlen) \
163 { \
164 char Xc; \
165 const char *Xcp; \
166 hash = (int)((uintptr_t)(dvp)) >> 8; \
167 for (Xcp = (name); (Xc = *Xcp) != 0; Xcp++) \
168 (hash) = ((hash) << 4) + (hash) + Xc; \
169 ASSERT((Xcp - (name)) <= ((1 << NBBY) - 1)); \
170 (namlen) = Xcp - (name); \
171 }
172
173 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
174
175 #include <sys/vfs.h>
176 #include <sys/vnode.h>
177
178 extern volatile int ncsize; /* set in param_init() # of dnlc entries */
179 extern vnode_t negative_cache_vnode;
180 #define DNLC_NO_VNODE &negative_cache_vnode
181
182 void dnlc_init(void);
183 void dnlc_enter(vnode_t *, const char *, vnode_t *);
184 void dnlc_update(vnode_t *, const char *, vnode_t *);
185 vnode_t *dnlc_lookup(vnode_t *, const char *);
186 void dnlc_purge(void);
187 void dnlc_purge_vp(vnode_t *);
188 int dnlc_purge_vfsp(vfs_t *, int);
189 void dnlc_remove(vnode_t *, const char *);
190 int dnlc_fs_purge1(struct vnodeops *);
191 void dnlc_reduce_cache(void *);
192
193 #endif /* defined(_KERNEL) */
194
195
196 /*
197 * Directory caching interfaces
198 * ============================
199 */
200
201 /*
202 * Typically for large directories, the file names will be the same or
203 * at least similar lengths. So there's no point in anything more elaborate
204 * than a simple unordered linked list of free space entries.
205 * For small directories the name length distribution doesn't really matter.
206 */
207 typedef struct dcfree {
208 uint64_t df_handle; /* fs supplied handle */
209 struct dcfree *df_next; /* link to next free entry in bucket */
210 uint_t df_len; /* length of free entry */
211 } dcfree_t;
212
213 typedef struct dcentry {
214 uint64_t de_handle; /* fs supplied and returned data */
215 struct dcentry *de_next; /* link to next name entry in bucket */
216 int de_hash; /* hash signature */
217 uchar_t de_namelen; /* length of name excluding null */
218 char de_name[1]; /* null terminated name */
219 } dcentry_t;
220
221 typedef struct dircache {
222 struct dircache *dc_next; /* chain - for purge purposes */
223 struct dircache *dc_prev; /* chain - for purge purposes */
224 int64_t dc_actime; /* dir access time, from lbolt64 */
225 dcentry_t **dc_namehash; /* entry hash table pointer */
226 dcfree_t **dc_freehash; /* free entry hash table pointer */
227 uint_t dc_num_entries; /* no of named entries */
228 uint_t dc_num_free; /* no of free space entries */
229 uint_t dc_nhash_mask; /* name hash table size - 1 */
230 uint_t dc_fhash_mask; /* free space hash table size - 1 */
231 struct dcanchor *dc_anchor; /* back ptr to anchor */
232 boolean_t dc_complete; /* cache complete boolean */
233 } dircache_t;
234
235 typedef struct dcanchor {
236 void *dca_dircache; /* pointer to directory cache */
237 kmutex_t dca_lock; /* protects the directory cache */
238 } dcanchor_t;
239
240 /*
241 * Head struct for doubly linked chain of dircache_t
242 * The next and prev fields must match those of a dircache_t
243 */
244 typedef struct {
245 dircache_t *dch_next; /* next in chain */
246 dircache_t *dch_prev; /* prev in chain */
247 kmutex_t dch_lock; /* lock for the chain */
248 } dchead_t;
249
250
251 #if defined(_KERNEL)
252
253 /*
254 * Status returns from the directory cache interfaces
255 */
256 typedef enum {
257 DOK, /* operation sucessful */
258 DNOCACHE, /* there is no cache */
259 DFOUND, /* entry found */
260 DNOENT, /* no entry found */
261 DTOOBIG, /* exceeds tunable dnlc_max_dir_cache */
262 DNOMEM /* no memory */
263 } dcret_t;
264
265 /*
266 * dnlc_dir_start() requests that a directory be cached.
267 * This must be called initially to enable caching on a directory.
268 * After a successful call, directory entries and free space can be
269 * added (see below) until the directory is marked complete.
270 * "num_entries" is an estimate of the current number of
271 * directory entries. The request is rejected with DNOCACHE
272 * if num_entries falls below the tunable dnlc_dir_min_size (see
273 * below), and rejected with DTOOBIG if it's above dnlc_dir_max_size.
274 * Returns DOK, DNOCACHE, DTOOBIG, DNOMEM.
275 *
276 * Due to memory shortages, directory caches can be purged at
277 * any time. If the last directory cache is purged due to memory
278 * shortage, then the directory cache is marked internally
279 * as "no memory". Future returns will all be DNOCACHE until
280 * the next dnlc_start_dir() which will return DNOMEM once.
281 * This memory shortage may only be transient. It's up to the
282 * file system as to what to do about this condition, but an
283 * attempt to immediately re-build the cache will very likely
284 * lead to the same shortage of memory and a thrashing situation.
285 *
286 * It's file system policy as to when and what size directories to cache.
287 */
288 dcret_t dnlc_dir_start(dcanchor_t *dcap, uint_t num_entries);
289
290 /*
291 * dnlc_dir_add_entry() adds an entry (name and handle) into a
292 * partial or complete cache. "handle" is a file system specific
293 * quantity that is returned on calls to dnlc_dir_lookup() - see below.
294 * For example, "handle" for ufs holds the inumber and a directory
295 * entry offset. Returns DOK, DNOCACHE, DTOOBIG.
296 */
297 dcret_t dnlc_dir_add_entry(dcanchor_t *dcap, const char *name, uint64_t handle);
298
299 /*
300 * dnlc_dir_add_space adds free space (length and file system specific
301 * handle) into a partial or complete cache. "handle" is a file
302 * system specific quantity that is returned on calls to
303 * dnlc_dir_rem_space_by_len(). For example, "handle" for ufs holds
304 * the directory entry offset. Returns DOK, DNOCACHE, DTOOBIG.
305 */
306 dcret_t dnlc_dir_add_space(dcanchor_t *dcap, uint_t len, uint64_t handle);
307
308 /*
309 * dnlc_dir_complete() indicates the previously partial cache is now complete.
310 */
311 void dnlc_dir_complete(dcanchor_t *dcap);
312
313 /*
314 * dnlc_dir_purge() deletes a partial or complete directory cache
315 */
316 void dnlc_dir_purge(dcanchor_t *dcap);
317
318 /*
319 * dnlc_dir_lookup() lookups a file name in a directory cache
320 * and returns the file system handle specified on dnlc_dir_add_entry()
321 * in "handlep". Returns DFOUND, DNOENT, DNOCACHE.
322 */
323 dcret_t dnlc_dir_lookup(dcanchor_t *dcap, const char *name, uint64_t *handlep);
324
325 /*
326 * dnlc_dir_update() amends the handle for an entry in a directory cache
327 * "handle" is the new file system specific handle for the file "name".
328 * Returns DFOUND, DNOENT, DNOCACHE.
329 */
330 dcret_t dnlc_dir_update(dcanchor_t *dcap, const char *name, uint64_t handle);
331
332 /*
333 * dnlc_dir_rem_entry() removes an entry form a directory cache.
334 * Returns the handle if "handlep" non null.
335 * Returns DFOUND, DNOENT, DNOCACHE.
336 */
337 dcret_t dnlc_dir_rem_entry(dcanchor_t *dcap, const char *name,
338 uint64_t *handlep);
339
340 /*
341 * dnlc_dir_rem_space_by_len() looks up and returns free space in a
342 * directory cache of at least the given "len". Returns in "handlep"
343 * the handle supplied when adding the free space in dnlc_dir_add_space().
344 * Returns DFOUND, DNOENT, DNOCACHE.
345 */
346 dcret_t dnlc_dir_rem_space_by_len(dcanchor_t *dcap, uint_t len,
347 uint64_t *handlep);
348
349 /*
350 * dnlc_dir_rem_space_by_handle() looks up and removes the free space in
351 * a directory cache with the given handle. Returns DFOUND, DNOENT, DNOCACHE.
352 */
353 dcret_t dnlc_dir_rem_space_by_handle(dcanchor_t *dcap, uint64_t handle);
354
355 /*
356 * dnlc_dir_init() initialises a directory anchor
357 */
358 #define dnlc_dir_init(dcap) { \
359 (dcap)->dca_dircache = NULL; \
360 mutex_init(&(dcap)->dca_lock, NULL, MUTEX_DEFAULT, NULL); }
361
362 /*
363 * dnlc_dir_fini() is called to indicate the anchor is no longer used.
364 * It ensures there's no directory cache and mutex_destroys the lock
365 */
366 void dnlc_dir_fini(dcanchor_t *dcap);
367
368 #endif /* defined(_KERNEL) */
369
370 #ifdef __cplusplus
371 }
372 #endif
373
374 #endif /* _SYS_DNLC_H */