1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 
  26 /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  27 /*        All Rights Reserved   */
  28 
  29 /*
  30  * University Copyright- Copyright (c) 1982, 1986, 1988
  31  * The Regents of the University of California
  32  * All Rights Reserved
  33  *
  34  * University Acknowledgment- Portions of this document are derived from
  35  * software developed by the University of California, Berkeley, and its
  36  * contributors.
  37  */
  38 
  39 #ifndef _SYS_DNLC_H
  40 #define _SYS_DNLC_H
  41 
  42 #ifdef  __cplusplus
  43 extern "C" {
  44 #endif
  45 
  46 #include <sys/kstat.h>
  47 
  48 /*
  49  * DNLC - Directory name lookup cache.
  50  * There are now two sorts of name caching:
  51  *
  52  * Standard dnlc: This original cache holds recent mappings
  53  *                of <directory vnode, name> to vnode mappings.
  54  *
  55  * Directory caches: Entire large directories can be cached, subject to
  56  *                   memory availability and tunables. A directory cache
  57  *                   anchor point must be provided in the xxnode for
  58  *                   a directory.
  59  */
  60 
  61 
  62 /*
  63  * Standard dnlc
  64  * =============
  65  */
  66 
  67 /*
  68  * This structure describes the elements in the cache of recent
  69  * names looked up.
  70  *
  71  * Note namlen is a uchar_t to conserve space
  72  * and alignment padding. The max length of any
  73  * pathname component is defined as MAXNAMELEN
  74  * which is 256 (including the terminating null).
  75  * So provided this doesn't change, we don't include the null,
  76  * we always use bcmp to compare strings, and we don't start
  77  * storing full names, then we are ok. The space savings are worth it.
  78  */
  79 typedef struct ncache {
  80         struct ncache *hash_next;       /* hash chain, MUST BE FIRST */
  81         struct ncache *hash_prev;
  82         struct vnode *vp;               /* vnode the name refers to */
  83         struct vnode *dp;               /* vnode of parent of name */
  84         int hash;                       /* hash signature */
  85         uchar_t namlen;                 /* length of name */
  86         char name[1];                   /* segment name - null terminated */
  87 } ncache_t;
  88 
  89 /*
  90  * Hash table bucket structure of name cache entries for fast lookup.
  91  */
  92 typedef struct nc_hash  {
  93         ncache_t *hash_next;
  94         ncache_t *hash_prev;
  95         kmutex_t hash_lock;
  96 } nc_hash_t;
  97 
  98 /*
  99  * Statistics on name cache
 100  * Not protected by locks
 101  */
 102 /*
 103  * ncstats has been deprecated, due to the integer size of the counters
 104  * which can easily overflow in the dnlc.
 105  * It is maintained (at some expense) for compatability.
 106  * The preferred interface is the kstat accessible nc_stats below, ehich
 107  * is actually shared with directory caching.
 108  */
 109 struct ncstats {
 110         int     hits;           /* hits that we can really use */
 111         int     misses;         /* cache misses */
 112         int     enters;         /* number of enters done */
 113         int     dbl_enters;     /* number of enters tried when already cached */
 114         int     long_enter;     /* deprecated, no longer accounted */
 115         int     long_look;      /* deprecated, no longer accounted */
 116         int     move_to_front;  /* entry moved to front of hash chain */
 117         int     purges;         /* number of purges of cache */
 118 };
 119 
 120 struct nc_stats {
 121         kstat_named_t ncs_hits;         /* cache hits */
 122         kstat_named_t ncs_misses;       /* cache misses */
 123         kstat_named_t ncs_neg_hits;     /* negative cache hits */
 124         kstat_named_t ncs_enters;       /* enters */
 125         kstat_named_t ncs_dbl_enters;   /* enters when entry already cached */
 126         kstat_named_t ncs_purge_total;  /* total entries prurged */
 127         kstat_named_t ncs_purge_all;    /* dnlc_purge() calls */
 128         kstat_named_t ncs_purge_vp;     /* dnlc_purge_vp() calls */
 129         kstat_named_t ncs_purge_vfs;    /* dnlc_purge_vfs() calls */
 130         kstat_named_t ncs_purge_fs1;    /* dnlc_purge_fs1() calls */
 131         kstat_named_t ncs_pick_free;    /* found a free ncache */
 132         kstat_named_t ncs_pick_heur;    /* found ncache w/ NULL vpages */
 133         kstat_named_t ncs_pick_last;    /* found last ncache on chain */
 134 
 135         /* directory caching stats */
 136 
 137         kstat_named_t ncs_dir_hits;     /* dir cache hits */
 138         kstat_named_t ncs_dir_misses;   /* dir cache misses */
 139         kstat_named_t ncs_cur_dirs;     /* current # directories cached */
 140         kstat_named_t ncs_dir_num_ents; /* current # entries cached */
 141         kstat_named_t ncs_dirs_cached;  /* total # directories cached */
 142         kstat_named_t ncs_dir_start_nm; /* dir start no memory */
 143         kstat_named_t ncs_dir_add_nm;   /* add entry/space - no memory */
 144         kstat_named_t ncs_dir_addabort; /* add entry/space - abort */
 145         kstat_named_t ncs_dir_add_max;  /* add entry/space - max exceeded */
 146         kstat_named_t ncs_dir_reme_fai; /* remove entry fail */
 147         kstat_named_t ncs_dir_rems_fai; /* remove space fail */
 148         kstat_named_t ncs_dir_upd_fail; /* update space fail */
 149         kstat_named_t ncs_dir_finipurg; /* fini purges */
 150         kstat_named_t ncs_dir_rec_last; /* reclaim last */
 151         kstat_named_t ncs_dir_recl_any; /* reclaim any */
 152 };
 153 
 154 /*
 155  * The dnlc hashing function.
 156  * Although really a kernel macro we export it to allow validation
 157  * of ncache_t entries by mdb. Note, mdb can handle the ASSERT.
 158  *
 159  * 'hash' and 'namlen' must be l-values. A check is made to ensure
 160  * the name length fits into an unsigned char (see ncache_t).
 161  */
 162 #define DNLCHASH(name, dvp, hash, namlen)                       \
 163         {                                                       \
 164                 char Xc;                                        \
 165                 const char *Xcp;                                \
 166                 hash = (int)((uintptr_t)(dvp)) >> 8;              \
 167                 for (Xcp = (name); (Xc = *Xcp) != 0; Xcp++)     \
 168                         (hash) = ((hash) << 4) + (hash) + Xc;     \
 169                 ASSERT((Xcp - (name)) <= ((1 << NBBY) - 1));   \
 170                 (namlen) = Xcp - (name);                        \
 171         }
 172 
 173 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
 174 
 175 #include <sys/vfs.h>
 176 #include <sys/vnode.h>
 177 
 178 extern volatile int ncsize;     /* set in param_init() # of dnlc entries */
 179 extern vnode_t negative_cache_vnode;
 180 #define DNLC_NO_VNODE &negative_cache_vnode
 181 
 182 void    dnlc_init(void);
 183 void    dnlc_enter(vnode_t *, const char *, vnode_t *);
 184 void    dnlc_update(vnode_t *, const char *, vnode_t *);
 185 vnode_t *dnlc_lookup(vnode_t *, const char *);
 186 void    dnlc_purge(void);
 187 void    dnlc_purge_vp(vnode_t *);
 188 int     dnlc_purge_vfsp(vfs_t *, int);
 189 void    dnlc_remove(vnode_t *, const char *);
 190 int     dnlc_fs_purge1(struct vnodeops *);
 191 void    dnlc_reduce_cache(void *);
 192 
 193 #endif  /* defined(_KERNEL) */
 194 
 195 
 196 /*
 197  * Directory caching interfaces
 198  * ============================
 199  */
 200 
 201 /*
 202  * Typically for large directories, the file names will be the same or
 203  * at least similar lengths. So there's no point in anything more elaborate
 204  * than a simple unordered linked list of free space entries.
 205  * For small directories the name length distribution doesn't really matter.
 206  */
 207 typedef struct dcfree {
 208         uint64_t df_handle;             /* fs supplied handle */
 209         struct dcfree *df_next;         /* link to next free entry in bucket */
 210         uint_t df_len;                  /* length of free entry */
 211 } dcfree_t;
 212 
 213 typedef struct dcentry {
 214         uint64_t de_handle;             /* fs supplied and returned data */
 215         struct dcentry *de_next;        /* link to next name entry in bucket */
 216         int de_hash;                    /* hash signature */
 217         uchar_t de_namelen;             /* length of name excluding null */
 218         char de_name[1];                /* null terminated name */
 219 } dcentry_t;
 220 
 221 typedef struct dircache {
 222         struct dircache *dc_next;       /* chain - for purge purposes */
 223         struct dircache *dc_prev;       /* chain - for purge purposes */
 224         int64_t dc_actime;              /* dir access time, from lbolt64 */
 225         dcentry_t **dc_namehash;        /* entry hash table pointer */
 226         dcfree_t **dc_freehash;         /* free entry hash table pointer */
 227         uint_t dc_num_entries;          /* no of named entries */
 228         uint_t dc_num_free;             /* no of free space entries */
 229         uint_t dc_nhash_mask;           /* name hash table size - 1 */
 230         uint_t dc_fhash_mask;           /* free space hash table size - 1 */
 231         struct dcanchor *dc_anchor;     /* back ptr to anchor */
 232         boolean_t dc_complete;          /* cache complete boolean */
 233 } dircache_t;
 234 
 235 typedef struct dcanchor {
 236         void *dca_dircache;     /* pointer to directory cache */
 237         kmutex_t dca_lock;              /* protects the directory cache */
 238 } dcanchor_t;
 239 
 240 /*
 241  * Head struct for doubly linked chain of dircache_t
 242  * The next and prev fields must match those of a dircache_t
 243  */
 244 typedef struct {
 245         dircache_t *dch_next;           /* next in chain */
 246         dircache_t *dch_prev;           /* prev in chain */
 247         kmutex_t dch_lock;              /* lock for the chain */
 248 } dchead_t;
 249 
 250 
 251 #if defined(_KERNEL)
 252 
 253 /*
 254  * Status returns from the directory cache interfaces
 255  */
 256 typedef enum {
 257         DOK,            /* operation sucessful */
 258         DNOCACHE,       /* there is no cache */
 259         DFOUND,         /* entry found */
 260         DNOENT,         /* no entry found */
 261         DTOOBIG,        /* exceeds tunable dnlc_max_dir_cache */
 262         DNOMEM          /* no memory */
 263 } dcret_t;
 264 
 265 /*
 266  * dnlc_dir_start() requests that a directory be cached.
 267  * This must be called initially to enable caching on a directory.
 268  * After a successful call, directory entries and free space can be
 269  * added (see below) until the directory is marked complete.
 270  * "num_entries" is an estimate of the current number of
 271  * directory entries. The request is rejected with DNOCACHE
 272  * if num_entries falls below the tunable dnlc_dir_min_size (see
 273  * below), and rejected with DTOOBIG if it's above dnlc_dir_max_size.
 274  * Returns DOK, DNOCACHE, DTOOBIG, DNOMEM.
 275  *
 276  * Due to memory shortages, directory caches can be purged at
 277  * any time. If the last directory cache is purged due to memory
 278  * shortage, then the directory cache is marked internally
 279  * as "no memory". Future returns will all be DNOCACHE until
 280  * the next dnlc_start_dir() which will return DNOMEM once.
 281  * This memory shortage may only be transient. It's up to the
 282  * file system as to what to do about this condition, but an
 283  * attempt to immediately re-build the cache will very likely
 284  * lead to the same shortage of memory and a thrashing situation.
 285  *
 286  * It's file system policy as to when and what size directories to cache.
 287  */
 288 dcret_t dnlc_dir_start(dcanchor_t *dcap, uint_t num_entries);
 289 
 290 /*
 291  * dnlc_dir_add_entry() adds an entry (name and handle) into a
 292  * partial or complete cache. "handle" is a file system specific
 293  * quantity that is returned on calls to dnlc_dir_lookup() - see below.
 294  * For example, "handle" for ufs holds the inumber and a directory
 295  * entry offset. Returns DOK, DNOCACHE, DTOOBIG.
 296  */
 297 dcret_t dnlc_dir_add_entry(dcanchor_t *dcap, const char *name, uint64_t handle);
 298 
 299 /*
 300  * dnlc_dir_add_space adds free space (length and file system specific
 301  * handle) into a partial or complete cache. "handle" is a file
 302  * system specific quantity that is returned on calls to
 303  * dnlc_dir_rem_space_by_len(). For example, "handle" for ufs holds
 304  * the directory entry offset.  Returns DOK, DNOCACHE, DTOOBIG.
 305  */
 306 dcret_t dnlc_dir_add_space(dcanchor_t *dcap, uint_t len, uint64_t handle);
 307 
 308 /*
 309  * dnlc_dir_complete() indicates the previously partial cache is now complete.
 310  */
 311 void dnlc_dir_complete(dcanchor_t *dcap);
 312 
 313 /*
 314  * dnlc_dir_purge() deletes a partial or complete directory cache
 315  */
 316 void dnlc_dir_purge(dcanchor_t *dcap);
 317 
 318 /*
 319  * dnlc_dir_lookup() lookups a file name in a directory cache
 320  * and returns the file system handle specified on dnlc_dir_add_entry()
 321  * in "handlep". Returns DFOUND, DNOENT, DNOCACHE.
 322  */
 323 dcret_t dnlc_dir_lookup(dcanchor_t *dcap, const char *name, uint64_t *handlep);
 324 
 325 /*
 326  * dnlc_dir_update() amends the handle for an entry in a directory cache
 327  * "handle" is the new file system specific handle for the file "name".
 328  * Returns DFOUND, DNOENT, DNOCACHE.
 329  */
 330 dcret_t dnlc_dir_update(dcanchor_t *dcap, const char *name, uint64_t handle);
 331 
 332 /*
 333  * dnlc_dir_rem_entry() removes an entry form a directory cache.
 334  * Returns the handle if "handlep" non null.
 335  * Returns DFOUND, DNOENT, DNOCACHE.
 336  */
 337 dcret_t dnlc_dir_rem_entry(dcanchor_t *dcap, const char *name,
 338     uint64_t *handlep);
 339 
 340 /*
 341  * dnlc_dir_rem_space_by_len() looks up and returns free space in a
 342  * directory cache of at least the given "len". Returns in "handlep"
 343  * the handle supplied when adding the free space in dnlc_dir_add_space().
 344  * Returns DFOUND, DNOENT, DNOCACHE.
 345  */
 346 dcret_t dnlc_dir_rem_space_by_len(dcanchor_t *dcap, uint_t len,
 347     uint64_t *handlep);
 348 
 349 /*
 350  * dnlc_dir_rem_space_by_handle() looks up and removes the free space in
 351  * a directory cache with the given handle. Returns DFOUND, DNOENT, DNOCACHE.
 352  */
 353 dcret_t dnlc_dir_rem_space_by_handle(dcanchor_t *dcap, uint64_t handle);
 354 
 355 /*
 356  * dnlc_dir_init() initialises a directory anchor
 357  */
 358 #define dnlc_dir_init(dcap) { \
 359         (dcap)->dca_dircache = NULL; \
 360         mutex_init(&(dcap)->dca_lock, NULL, MUTEX_DEFAULT, NULL); }
 361 
 362 /*
 363  * dnlc_dir_fini() is called to indicate the anchor is no longer used.
 364  * It ensures there's no directory cache and mutex_destroys the lock
 365  */
 366 void dnlc_dir_fini(dcanchor_t *dcap);
 367 
 368 #endif  /* defined(_KERNEL) */
 369 
 370 #ifdef  __cplusplus
 371 }
 372 #endif
 373 
 374 #endif  /* _SYS_DNLC_H */