1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 #include <sys/systm.h>
26 #include <sys/cmn_err.h>
27 #include <sys/kmem.h>
28 #include <sys/disp.h>
29 #include <sys/id_space.h>
30 #include <sys/atomic.h>
31 #include <rpc/rpc.h>
32 #include <nfs/nfs4.h>
33 #include <nfs/nfs4_db_impl.h>
34 #include <sys/sdt.h>
35
36 static int rfs4_reap_interval = RFS4_REAP_INTERVAL;
37
38 static void rfs4_dbe_reap(rfs4_table_t *, time_t, uint32_t);
39 static void rfs4_dbe_destroy(rfs4_dbe_t *);
40 static rfs4_dbe_t *rfs4_dbe_create(rfs4_table_t *, id_t, rfs4_entry_t);
41 static void rfs4_start_reaper(rfs4_table_t *);
42
43 /*
44 * t_lowat - integer percentage of table entries /etc/system only
45 * t_hiwat - integer percentage of table entries /etc/system only
46 * t_lreap - integer percentage of table reap time mdb or /etc/system
47 * t_hreap - integer percentage of table reap time mdb or /etc/system
48 */
49 uint32_t t_lowat = 50; /* reap at t_lreap when id's in use hit 50% */
50 uint32_t t_hiwat = 75; /* reap at t_hreap when id's in use hit 75% */
51 time_t t_lreap = 50; /* default to 50% of table's reap interval */
52 time_t t_hreap = 10; /* default to 10% of table's reap interval */
53
54 id_t
55 rfs4_dbe_getid(rfs4_dbe_t *entry)
56 {
57 return (entry->dbe_id);
58 }
59
60 void
61 rfs4_dbe_hold(rfs4_dbe_t *entry)
62 {
63 atomic_inc_32(&entry->dbe_refcnt);
64 }
65
66 /*
67 * rfs4_dbe_rele_nolock only decrements the reference count of the entry.
68 */
69 void
70 rfs4_dbe_rele_nolock(rfs4_dbe_t *entry)
71 {
72 atomic_dec_32(&entry->dbe_refcnt);
73 }
74
75
76 uint32_t
77 rfs4_dbe_refcnt(rfs4_dbe_t *entry)
78 {
79 return (entry->dbe_refcnt);
80 }
81
82 /*
83 * Mark an entry such that the dbsearch will skip it.
84 * Caller does not want this entry to be found any longer
85 */
86 void
87 rfs4_dbe_invalidate(rfs4_dbe_t *entry)
88 {
89 entry->dbe_invalid = TRUE;
90 entry->dbe_skipsearch = TRUE;
91 }
92
93 /*
94 * Is this entry invalid?
95 */
96 bool_t
97 rfs4_dbe_is_invalid(rfs4_dbe_t *entry)
98 {
99 return (entry->dbe_invalid);
100 }
101
102 time_t
103 rfs4_dbe_get_timerele(rfs4_dbe_t *entry)
104 {
105 return (entry->dbe_time_rele);
106 }
107
108 /*
109 * Use these to temporarily hide/unhide a db entry.
110 */
112 rfs4_dbe_hide(rfs4_dbe_t *entry)
113 {
114 rfs4_dbe_lock(entry);
115 entry->dbe_skipsearch = TRUE;
116 rfs4_dbe_unlock(entry);
117 }
118
119 void
120 rfs4_dbe_unhide(rfs4_dbe_t *entry)
121 {
122 rfs4_dbe_lock(entry);
123 entry->dbe_skipsearch = FALSE;
124 rfs4_dbe_unlock(entry);
125 }
126
127 void
128 rfs4_dbe_rele(rfs4_dbe_t *entry)
129 {
130 mutex_enter(entry->dbe_lock);
131 ASSERT(entry->dbe_refcnt > 1);
132 atomic_dec_32(&entry->dbe_refcnt);
133 entry->dbe_time_rele = gethrestime_sec();
134 mutex_exit(entry->dbe_lock);
135 }
136
137 void
138 rfs4_dbe_lock(rfs4_dbe_t *entry)
139 {
140 mutex_enter(entry->dbe_lock);
141 }
142
143 void
144 rfs4_dbe_unlock(rfs4_dbe_t *entry)
145 {
146 mutex_exit(entry->dbe_lock);
147 }
148
149 bool_t
150 rfs4_dbe_islocked(rfs4_dbe_t *entry)
151 {
152 return (mutex_owned(entry->dbe_lock));
232 /*
233 * Given a database that has been "shutdown" by the function above all
234 * of the table tables are destroyed and then the database itself
235 * freed.
236 */
237 void
238 rfs4_database_destroy(rfs4_database_t *db)
239 {
240 rfs4_table_t *next, *tmp;
241
242 for (next = db->db_tables; next; ) {
243 tmp = next;
244 next = tmp->dbt_tnext;
245 rfs4_table_destroy(db, tmp);
246 }
247
248 mutex_destroy(db->db_lock);
249 kmem_free(db, sizeof (rfs4_database_t));
250 }
251
252 rfs4_table_t *
253 rfs4_table_create(rfs4_database_t *db, char *tabname, time_t max_cache_time,
254 uint32_t idxcnt, bool_t (*create)(rfs4_entry_t, void *),
255 void (*destroy)(rfs4_entry_t),
256 bool_t (*expiry)(rfs4_entry_t),
257 uint32_t size, uint32_t hashsize,
258 uint32_t maxentries, id_t start)
259 {
260 rfs4_table_t *table;
261 int len;
262 char *cache_name;
263 char *id_name;
264
265 table = kmem_alloc(sizeof (rfs4_table_t), KM_SLEEP);
266 table->dbt_db = db;
267 rw_init(table->dbt_t_lock, NULL, RW_DEFAULT, NULL);
268 mutex_init(table->dbt_lock, NULL, MUTEX_DEFAULT, NULL);
269 mutex_init(&table->dbt_reaper_cv_lock, NULL, MUTEX_DEFAULT, NULL);
270 cv_init(&table->dbt_reaper_wait, NULL, CV_DEFAULT, NULL);
271
287
288 if (start >= 0) {
289 if (maxentries + (uint32_t)start > (uint32_t)INT32_MAX)
290 maxentries = INT32_MAX - start;
291 id_name = kmem_alloc(len + 9 /* "_id_space" */ + 1, KM_SLEEP);
292 (void) sprintf(id_name, "%s_id_space", table->dbt_name);
293 table->dbt_id_space = id_space_create(id_name, start,
294 maxentries + start);
295 kmem_free(id_name, len + 10);
296 }
297 ASSERT(t_lowat != 0);
298 table->dbt_id_lwat = (maxentries * t_lowat) / 100;
299 ASSERT(t_hiwat != 0);
300 table->dbt_id_hwat = (maxentries * t_hiwat) / 100;
301 table->dbt_id_reap = MIN(rfs4_reap_interval, max_cache_time);
302 table->dbt_maxentries = maxentries;
303 table->dbt_create = create;
304 table->dbt_destroy = destroy;
305 table->dbt_expiry = expiry;
306
307 table->dbt_mem_cache = kmem_cache_create(cache_name,
308 sizeof (rfs4_dbe_t) + idxcnt * sizeof (rfs4_link_t) + size,
309 0,
310 rfs4_dbe_kmem_constructor,
311 rfs4_dbe_kmem_destructor,
312 NULL,
313 table,
314 NULL,
315 0);
316 kmem_free(cache_name, len+13);
317
318 table->dbt_debug = db->db_debug_flags;
319
320 mutex_enter(db->db_lock);
321 table->dbt_tnext = db->db_tables;
322 db->db_tables = table;
323 mutex_exit(db->db_lock);
324
325 rfs4_start_reaper(table);
326
327 return (table);
328 }
329
330 void
331 rfs4_table_destroy(rfs4_database_t *db, rfs4_table_t *table)
332 {
333 rfs4_table_t *p;
334 rfs4_index_t *idx;
335
347 }
348 ASSERT(p != NULL);
349 }
350 mutex_exit(db->db_lock);
351
352 /* Destroy indices */
353 while (table->dbt_indices) {
354 idx = table->dbt_indices;
355 table->dbt_indices = idx->dbi_inext;
356 rfs4_index_destroy(idx);
357 }
358
359 rw_destroy(table->dbt_t_lock);
360 mutex_destroy(table->dbt_lock);
361 mutex_destroy(&table->dbt_reaper_cv_lock);
362 cv_destroy(&table->dbt_reaper_wait);
363
364 kmem_free(table->dbt_name, strlen(table->dbt_name) + 1);
365 if (table->dbt_id_space)
366 id_space_destroy(table->dbt_id_space);
367 kmem_cache_destroy(table->dbt_mem_cache);
368 kmem_free(table, sizeof (rfs4_table_t));
369 }
370
371 rfs4_index_t *
372 rfs4_index_create(rfs4_table_t *table, char *keyname,
373 uint32_t (*hash)(void *),
374 bool_t (compare)(rfs4_entry_t, void *),
375 void *(*mkkey)(rfs4_entry_t),
376 bool_t createable)
377 {
378 rfs4_index_t *idx;
379
380 ASSERT(table->dbt_idxcnt < table->dbt_maxcnt);
381
382 idx = kmem_alloc(sizeof (rfs4_index_t), KM_SLEEP);
383
384 idx->dbi_table = table;
385 idx->dbi_keyname = kmem_alloc(strlen(keyname) + 1, KM_SLEEP);
386 (void) strcpy(idx->dbi_keyname, keyname);
387 idx->dbi_hash = hash;
666 continue;
667 l = &entry->dbe_indices[ip->dbi_tblidx];
668 i = HASH(ip, ip->dbi_mkkey(entry->dbe_data));
669 ASSERT(i < ip->dbi_table->dbt_len);
670 bp = &ip->dbi_buckets[i];
671 ENQUEUE_IDX(bp, l);
672 }
673
674 NFS4_DEBUG(
675 table->dbt_debug & SEARCH_DEBUG || table->dbt_debug & CREATE_DEBUG,
676 (CE_NOTE, "Entry %p created for %s = %p in table %s",
677 (void*)entry, idx->dbi_keyname, (void*)key, table->dbt_name));
678
679 return (entry->dbe_data);
680 }
681
682 /*ARGSUSED*/
683 boolean_t
684 rfs4_cpr_callb(void *arg, int code)
685 {
686 rfs4_table_t *table = rfs4_client_tab;
687 rfs4_bucket_t *buckets, *bp;
688 rfs4_link_t *l;
689 rfs4_client_t *cp;
690 int i;
691
692 /*
693 * We get called for Suspend and Resume events.
694 * For the suspend case we simply don't care! Nor do we care if
695 * there are no clients.
696 */
697 if (code == CB_CODE_CPR_CHKPT || table == NULL) {
698 return (B_TRUE);
699 }
700
701 buckets = table->dbt_indices->dbi_buckets;
702
703 /*
704 * When we get this far we are in the process of
705 * resuming the system from a previous suspend.
706 *
707 * We are going to blast through and update the
708 * last_access time for all the clients and in
709 * doing so extend them by one lease period.
710 */
711 for (i = 0; i < table->dbt_len; i++) {
758 }
759
760
761 static void
762 rfs4_dbe_reap(rfs4_table_t *table, time_t cache_time, uint32_t desired)
763 {
764 rfs4_index_t *idx = table->dbt_indices;
765 rfs4_bucket_t *buckets = idx->dbi_buckets, *bp;
766 rfs4_link_t *l, *t;
767 rfs4_dbe_t *entry;
768 bool_t found;
769 int i;
770 int count = 0;
771
772 NFS4_DEBUG(table->dbt_debug & REAP_DEBUG,
773 (CE_NOTE, "Reaping %d entries older than %ld seconds in table %s",
774 desired, cache_time, table->dbt_name));
775
776 /* Walk the buckets looking for entries to release/destroy */
777 for (i = 0; i < table->dbt_len; i++) {
778 bp = &buckets[i];
779 do {
780 found = FALSE;
781 rw_enter(bp->dbk_lock, RW_READER);
782 for (l = bp->dbk_head; l; l = l->next) {
783 entry = l->entry;
784 /*
785 * Examine an entry. Ref count of 1 means
786 * that the only reference is for the hash
787 * table reference.
788 */
789 if (entry->dbe_refcnt != 1)
790 continue;
791 mutex_enter(entry->dbe_lock);
792 if ((entry->dbe_refcnt == 1) &&
793 (table->dbt_reaper_shutdown ||
794 table->dbt_expiry == NULL ||
795 (*table->dbt_expiry)(entry->dbe_data))) {
796 entry->dbe_refcnt--;
797 count++;
798 found = TRUE;
799 }
800 mutex_exit(entry->dbe_lock);
801 }
802 if (found) {
803 if (!rw_tryupgrade(bp->dbk_lock)) {
804 rw_exit(bp->dbk_lock);
805 rw_enter(bp->dbk_lock, RW_WRITER);
806 }
807
808 l = bp->dbk_head;
809 while (l) {
810 t = l;
811 entry = t->entry;
812 l = l->next;
813 if (entry->dbe_refcnt == 0) {
814 DEQUEUE(bp->dbk_head, t);
815 t->next = NULL;
816 t->prev = NULL;
817 INVALIDATE_ADDR(t->entry);
818 rfs4_dbe_destroy(entry);
819 }
820 }
821 }
822 rw_exit(bp->dbk_lock);
823 /*
824 * delay slightly if there is more work to do
825 * with the expectation that other reaper
826 * threads are freeing data structures as well
827 * and in turn will reduce ref counts on
828 * entries in this table allowing them to be
829 * released. This is only done in the
830 * instance that the tables are being shut down.
831 */
832 if (table->dbt_reaper_shutdown && bp->dbk_head != NULL)
833 delay(hz/100);
834 /*
835 * If this is a table shutdown, keep going until
836 * everything is gone
837 */
838 } while (table->dbt_reaper_shutdown && bp->dbk_head != NULL);
839
840 if (!table->dbt_reaper_shutdown && desired && count >= desired)
841 break;
842 }
843
844 NFS4_DEBUG(table->dbt_debug & REAP_DEBUG,
845 (CE_NOTE, "Reaped %d entries older than %ld seconds in table %s",
846 count, cache_time, table->dbt_name));
847 }
848
849 static void
850 reaper_thread(caddr_t *arg)
851 {
852 rfs4_table_t *table = (rfs4_table_t *)arg;
853 clock_t rc;
854
855 NFS4_DEBUG(table->dbt_debug,
856 (CE_NOTE, "rfs4_reaper_thread starting for %s", table->dbt_name));
857
858 CALLB_CPR_INIT(&table->dbt_reaper_cpr_info, &table->dbt_reaper_cv_lock,
862 do {
863 CALLB_CPR_SAFE_BEGIN(&table->dbt_reaper_cpr_info);
864 rc = cv_reltimedwait_sig(&table->dbt_reaper_wait,
865 &table->dbt_reaper_cv_lock,
866 SEC_TO_TICK(table->dbt_id_reap), TR_CLOCK_TICK);
867 CALLB_CPR_SAFE_END(&table->dbt_reaper_cpr_info,
868 &table->dbt_reaper_cv_lock);
869 rfs4_dbe_reap(table, table->dbt_max_cache_time, 0);
870 } while (rc != 0 && table->dbt_reaper_shutdown == FALSE);
871
872 CALLB_CPR_EXIT(&table->dbt_reaper_cpr_info);
873
874 NFS4_DEBUG(table->dbt_debug,
875 (CE_NOTE, "rfs4_reaper_thread exiting for %s", table->dbt_name));
876
877 /* Notify the database shutdown processing that the table is shutdown */
878 mutex_enter(table->dbt_db->db_lock);
879 table->dbt_db->db_shutdown_count--;
880 cv_signal(&table->dbt_db->db_shutdown_wait);
881 mutex_exit(table->dbt_db->db_lock);
882 }
883
884 static void
885 rfs4_start_reaper(rfs4_table_t *table)
886 {
887 if (table->dbt_max_cache_time == 0)
888 return;
889
890 (void) thread_create(NULL, 0, reaper_thread, table, 0, &p0, TS_RUN,
891 minclsyspri);
892 }
893
894 #ifdef DEBUG
895 void
896 rfs4_dbe_debug(rfs4_dbe_t *entry)
897 {
898 cmn_err(CE_NOTE, "Entry %p from table %s",
899 (void *)entry, entry->dbe_table->dbt_name);
900 cmn_err(CE_CONT, "\trefcnt = %d id = %d",
901 entry->dbe_refcnt, entry->dbe_id);
902 }
903 #endif
|
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Copyright 2019 Nexenta Systems, Inc.
28 */
29
30 #include <sys/systm.h>
31 #include <sys/cmn_err.h>
32 #include <sys/kmem.h>
33 #include <sys/disp.h>
34 #include <sys/id_space.h>
35 #include <rpc/rpc.h>
36 #include <nfs/nfs4.h>
37 #include <nfs/nfs4_db_impl.h>
38 #include <sys/sdt.h>
39
40 static int rfs4_reap_interval = RFS4_REAP_INTERVAL;
41
42 static void rfs4_dbe_reap(rfs4_table_t *, time_t, uint32_t);
43 static void rfs4_dbe_destroy(rfs4_dbe_t *);
44 static rfs4_dbe_t *rfs4_dbe_create(rfs4_table_t *, id_t, rfs4_entry_t);
45 static void rfs4_start_reaper(rfs4_table_t *);
46
47 /*
48 * t_lowat - integer percentage of table entries /etc/system only
49 * t_hiwat - integer percentage of table entries /etc/system only
50 * t_lreap - integer percentage of table reap time mdb or /etc/system
51 * t_hreap - integer percentage of table reap time mdb or /etc/system
52 */
53 uint32_t t_lowat = 50; /* reap at t_lreap when id's in use hit 50% */
54 uint32_t t_hiwat = 75; /* reap at t_hreap when id's in use hit 75% */
55 time_t t_lreap = 50; /* default to 50% of table's reap interval */
56 time_t t_hreap = 10; /* default to 10% of table's reap interval */
57
58 id_t
59 rfs4_dbe_getid(rfs4_dbe_t *entry)
60 {
61 return (entry->dbe_id);
62 }
63
64 void
65 rfs4_dbe_hold(rfs4_dbe_t *entry)
66 {
67 if (!MUTEX_HELD(entry->dbe_lock)) {
68 mutex_enter(entry->dbe_lock);
69 entry->dbe_refcnt++;
70 mutex_exit(entry->dbe_lock);
71 } else {
72 entry->dbe_refcnt++;
73 }
74 }
75
76 /*
77 * rfs4_dbe_rele_nolock only decrements the reference count of the entry.
78 */
79 void
80 rfs4_dbe_rele_nolock(rfs4_dbe_t *entry)
81 {
82 if (!MUTEX_HELD(entry->dbe_lock)) {
83 ASSERT(entry->dbe_refcnt > 0);
84 mutex_enter(entry->dbe_lock);
85 entry->dbe_refcnt--;
86 mutex_exit(entry->dbe_lock);
87 } else {
88 entry->dbe_refcnt--;
89 }
90 }
91
92
93 uint32_t
94 rfs4_dbe_refcnt(rfs4_dbe_t *entry)
95 {
96 return (entry->dbe_refcnt);
97 }
98
99 /*
100 * Mark an entry such that the dbsearch will skip it.
101 * Caller does not want this entry to be found any longer
102 */
103 void
104 rfs4_dbe_invalidate(rfs4_dbe_t *entry)
105 {
106 if (!MUTEX_HELD(entry->dbe_lock)) {
107 mutex_enter(entry->dbe_lock);
108 entry->dbe_invalid = TRUE;
109 entry->dbe_skipsearch = TRUE;
110 mutex_exit(entry->dbe_lock);
111 } else {
112 entry->dbe_invalid = TRUE;
113 entry->dbe_skipsearch = TRUE;
114 }
115 }
116
117 /*
118 * Is this entry invalid?
119 */
120 bool_t
121 rfs4_dbe_is_invalid(rfs4_dbe_t *entry)
122 {
123 return (entry->dbe_invalid);
124 }
125
126 time_t
127 rfs4_dbe_get_timerele(rfs4_dbe_t *entry)
128 {
129 return (entry->dbe_time_rele);
130 }
131
132 /*
133 * Use these to temporarily hide/unhide a db entry.
134 */
136 rfs4_dbe_hide(rfs4_dbe_t *entry)
137 {
138 rfs4_dbe_lock(entry);
139 entry->dbe_skipsearch = TRUE;
140 rfs4_dbe_unlock(entry);
141 }
142
143 void
144 rfs4_dbe_unhide(rfs4_dbe_t *entry)
145 {
146 rfs4_dbe_lock(entry);
147 entry->dbe_skipsearch = FALSE;
148 rfs4_dbe_unlock(entry);
149 }
150
151 void
152 rfs4_dbe_rele(rfs4_dbe_t *entry)
153 {
154 mutex_enter(entry->dbe_lock);
155 ASSERT(entry->dbe_refcnt > 1);
156 entry->dbe_refcnt--;
157 entry->dbe_time_rele = gethrestime_sec();
158 mutex_exit(entry->dbe_lock);
159 }
160
161 void
162 rfs4_dbe_lock(rfs4_dbe_t *entry)
163 {
164 mutex_enter(entry->dbe_lock);
165 }
166
167 void
168 rfs4_dbe_unlock(rfs4_dbe_t *entry)
169 {
170 mutex_exit(entry->dbe_lock);
171 }
172
173 bool_t
174 rfs4_dbe_islocked(rfs4_dbe_t *entry)
175 {
176 return (mutex_owned(entry->dbe_lock));
256 /*
257 * Given a database that has been "shutdown" by the function above all
258 * of the table tables are destroyed and then the database itself
259 * freed.
260 */
261 void
262 rfs4_database_destroy(rfs4_database_t *db)
263 {
264 rfs4_table_t *next, *tmp;
265
266 for (next = db->db_tables; next; ) {
267 tmp = next;
268 next = tmp->dbt_tnext;
269 rfs4_table_destroy(db, tmp);
270 }
271
272 mutex_destroy(db->db_lock);
273 kmem_free(db, sizeof (rfs4_database_t));
274 }
275
276 /*
277 * Used to get the correct kmem_cache database for the state table being
278 * created.
279 * Helper function for rfs4_table_create
280 */
281 static kmem_cache_t *
282 get_db_mem_cache(char *name)
283 {
284 int i;
285
286 for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) {
287 if (strcmp(name, rfs4_db_mem_cache_table[i].r_db_name) == 0)
288 return (rfs4_db_mem_cache_table[i].r_db_mem_cache);
289 }
290 /*
291 * There is no associated kmem cache for this NFS4 server state
292 * table name
293 */
294 return (NULL);
295 }
296
297 /*
298 * Used to initialize the global NFSv4 server state database.
299 * Helper funtion for rfs4_state_g_init and called when module is loaded.
300 */
301 kmem_cache_t *
302 /* CSTYLED */
303 nfs4_init_mem_cache(char *cache_name, uint32_t idxcnt, uint32_t size, uint32_t idx)
304 {
305 kmem_cache_t *mem_cache = kmem_cache_create(cache_name,
306 sizeof (rfs4_dbe_t) + idxcnt * sizeof (rfs4_link_t) + size,
307 0,
308 rfs4_dbe_kmem_constructor,
309 rfs4_dbe_kmem_destructor,
310 NULL,
311 NULL,
312 NULL,
313 0);
314 (void) strlcpy(rfs4_db_mem_cache_table[idx].r_db_name, cache_name,
315 strlen(cache_name) + 1);
316 rfs4_db_mem_cache_table[idx].r_db_mem_cache = mem_cache;
317 return (mem_cache);
318 }
319
320 rfs4_table_t *
321 rfs4_table_create(rfs4_database_t *db, char *tabname, time_t max_cache_time,
322 uint32_t idxcnt, bool_t (*create)(rfs4_entry_t, void *),
323 void (*destroy)(rfs4_entry_t),
324 bool_t (*expiry)(rfs4_entry_t),
325 uint32_t size, uint32_t hashsize,
326 uint32_t maxentries, id_t start)
327 {
328 rfs4_table_t *table;
329 int len;
330 char *cache_name;
331 char *id_name;
332
333 table = kmem_alloc(sizeof (rfs4_table_t), KM_SLEEP);
334 table->dbt_db = db;
335 rw_init(table->dbt_t_lock, NULL, RW_DEFAULT, NULL);
336 mutex_init(table->dbt_lock, NULL, MUTEX_DEFAULT, NULL);
337 mutex_init(&table->dbt_reaper_cv_lock, NULL, MUTEX_DEFAULT, NULL);
338 cv_init(&table->dbt_reaper_wait, NULL, CV_DEFAULT, NULL);
339
355
356 if (start >= 0) {
357 if (maxentries + (uint32_t)start > (uint32_t)INT32_MAX)
358 maxentries = INT32_MAX - start;
359 id_name = kmem_alloc(len + 9 /* "_id_space" */ + 1, KM_SLEEP);
360 (void) sprintf(id_name, "%s_id_space", table->dbt_name);
361 table->dbt_id_space = id_space_create(id_name, start,
362 maxentries + start);
363 kmem_free(id_name, len + 10);
364 }
365 ASSERT(t_lowat != 0);
366 table->dbt_id_lwat = (maxentries * t_lowat) / 100;
367 ASSERT(t_hiwat != 0);
368 table->dbt_id_hwat = (maxentries * t_hiwat) / 100;
369 table->dbt_id_reap = MIN(rfs4_reap_interval, max_cache_time);
370 table->dbt_maxentries = maxentries;
371 table->dbt_create = create;
372 table->dbt_destroy = destroy;
373 table->dbt_expiry = expiry;
374
375 /*
376 * get the correct kmem_cache for this table type based on the name.
377 */
378 table->dbt_mem_cache = get_db_mem_cache(cache_name);
379
380 kmem_free(cache_name, len+13);
381
382 table->dbt_debug = db->db_debug_flags;
383
384 mutex_enter(db->db_lock);
385 table->dbt_tnext = db->db_tables;
386 db->db_tables = table;
387 mutex_exit(db->db_lock);
388
389 rfs4_start_reaper(table);
390
391 return (table);
392 }
393
394 void
395 rfs4_table_destroy(rfs4_database_t *db, rfs4_table_t *table)
396 {
397 rfs4_table_t *p;
398 rfs4_index_t *idx;
399
411 }
412 ASSERT(p != NULL);
413 }
414 mutex_exit(db->db_lock);
415
416 /* Destroy indices */
417 while (table->dbt_indices) {
418 idx = table->dbt_indices;
419 table->dbt_indices = idx->dbi_inext;
420 rfs4_index_destroy(idx);
421 }
422
423 rw_destroy(table->dbt_t_lock);
424 mutex_destroy(table->dbt_lock);
425 mutex_destroy(&table->dbt_reaper_cv_lock);
426 cv_destroy(&table->dbt_reaper_wait);
427
428 kmem_free(table->dbt_name, strlen(table->dbt_name) + 1);
429 if (table->dbt_id_space)
430 id_space_destroy(table->dbt_id_space);
431 table->dbt_mem_cache = NULL;
432 kmem_free(table, sizeof (rfs4_table_t));
433 }
434
435 rfs4_index_t *
436 rfs4_index_create(rfs4_table_t *table, char *keyname,
437 uint32_t (*hash)(void *),
438 bool_t (compare)(rfs4_entry_t, void *),
439 void *(*mkkey)(rfs4_entry_t),
440 bool_t createable)
441 {
442 rfs4_index_t *idx;
443
444 ASSERT(table->dbt_idxcnt < table->dbt_maxcnt);
445
446 idx = kmem_alloc(sizeof (rfs4_index_t), KM_SLEEP);
447
448 idx->dbi_table = table;
449 idx->dbi_keyname = kmem_alloc(strlen(keyname) + 1, KM_SLEEP);
450 (void) strcpy(idx->dbi_keyname, keyname);
451 idx->dbi_hash = hash;
730 continue;
731 l = &entry->dbe_indices[ip->dbi_tblidx];
732 i = HASH(ip, ip->dbi_mkkey(entry->dbe_data));
733 ASSERT(i < ip->dbi_table->dbt_len);
734 bp = &ip->dbi_buckets[i];
735 ENQUEUE_IDX(bp, l);
736 }
737
738 NFS4_DEBUG(
739 table->dbt_debug & SEARCH_DEBUG || table->dbt_debug & CREATE_DEBUG,
740 (CE_NOTE, "Entry %p created for %s = %p in table %s",
741 (void*)entry, idx->dbi_keyname, (void*)key, table->dbt_name));
742
743 return (entry->dbe_data);
744 }
745
746 /*ARGSUSED*/
747 boolean_t
748 rfs4_cpr_callb(void *arg, int code)
749 {
750 rfs4_bucket_t *buckets, *bp;
751 rfs4_link_t *l;
752 rfs4_client_t *cp;
753 int i;
754
755 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
756 rfs4_table_t *table = nsrv4->rfs4_client_tab;
757
758 /*
759 * We get called for Suspend and Resume events.
760 * For the suspend case we simply don't care! Nor do we care if
761 * there are no clients.
762 */
763 if (code == CB_CODE_CPR_CHKPT || table == NULL) {
764 return (B_TRUE);
765 }
766
767 buckets = table->dbt_indices->dbi_buckets;
768
769 /*
770 * When we get this far we are in the process of
771 * resuming the system from a previous suspend.
772 *
773 * We are going to blast through and update the
774 * last_access time for all the clients and in
775 * doing so extend them by one lease period.
776 */
777 for (i = 0; i < table->dbt_len; i++) {
824 }
825
826
827 static void
828 rfs4_dbe_reap(rfs4_table_t *table, time_t cache_time, uint32_t desired)
829 {
830 rfs4_index_t *idx = table->dbt_indices;
831 rfs4_bucket_t *buckets = idx->dbi_buckets, *bp;
832 rfs4_link_t *l, *t;
833 rfs4_dbe_t *entry;
834 bool_t found;
835 int i;
836 int count = 0;
837
838 NFS4_DEBUG(table->dbt_debug & REAP_DEBUG,
839 (CE_NOTE, "Reaping %d entries older than %ld seconds in table %s",
840 desired, cache_time, table->dbt_name));
841
842 /* Walk the buckets looking for entries to release/destroy */
843 for (i = 0; i < table->dbt_len; i++) {
844 int retries = 0;
845 bp = &buckets[i];
846 do {
847 found = FALSE;
848 rw_enter(bp->dbk_lock, RW_READER);
849 for (l = bp->dbk_head; l; l = l->next) {
850 entry = l->entry;
851 mutex_enter(entry->dbe_lock);
852 ASSERT(entry->dbe_refcnt != 0);
853 /*
854 * Examine an entry. Ref count of 1 means
855 * that the only reference is for the hash
856 * table reference.
857 */
858 if (entry->dbe_refcnt != 1) {
859 #ifdef DEBUG
860 rfs4_dbe_debug(entry);
861 #endif
862 mutex_exit(entry->dbe_lock);
863 continue;
864 }
865 if ((entry->dbe_refcnt == 1) &&
866 (table->dbt_reaper_shutdown ||
867 table->dbt_expiry == NULL ||
868 (*table->dbt_expiry)(entry->dbe_data))) {
869 rfs4_dbe_rele_nolock(entry);
870 count++;
871 found = TRUE;
872 }
873 mutex_exit(entry->dbe_lock);
874 }
875 if (found) {
876 if (!rw_tryupgrade(bp->dbk_lock)) {
877 rw_exit(bp->dbk_lock);
878 rw_enter(bp->dbk_lock, RW_WRITER);
879 }
880
881 l = bp->dbk_head;
882 while (l) {
883 t = l;
884 entry = t->entry;
885 l = l->next;
886 mutex_enter(entry->dbe_lock);
887 if (entry->dbe_refcnt == 0) {
888 DEQUEUE(bp->dbk_head, t);
889 mutex_exit(entry->dbe_lock);
890 t->next = NULL;
891 t->prev = NULL;
892 INVALIDATE_ADDR(t->entry);
893 rfs4_dbe_destroy(entry);
894 } else
895 mutex_exit(entry->dbe_lock);
896 }
897 }
898 rw_exit(bp->dbk_lock);
899 /*
900 * delay slightly if there is more work to do
901 * with the expectation that other reaper
902 * threads are freeing data structures as well
903 * and in turn will reduce ref counts on
904 * entries in this table allowing them to be
905 * released. This is only done in the
906 * instance that the tables are being shut down.
907 */
908 if (table->dbt_reaper_shutdown && bp->dbk_head != NULL) {
909 delay(hz/100);
910 retries++;
911 }
912 /*
913 * If this is a table shutdown, keep going until
914 * everything is gone
915 */
916 } while (table->dbt_reaper_shutdown && bp->dbk_head != NULL && retries < 5);
917
918 if (!table->dbt_reaper_shutdown && desired && count >= desired)
919 break;
920 }
921
922 NFS4_DEBUG(table->dbt_debug & REAP_DEBUG,
923 (CE_NOTE, "Reaped %d entries older than %ld seconds in table %s",
924 count, cache_time, table->dbt_name));
925 }
926
927 static void
928 reaper_thread(caddr_t *arg)
929 {
930 rfs4_table_t *table = (rfs4_table_t *)arg;
931 clock_t rc;
932
933 NFS4_DEBUG(table->dbt_debug,
934 (CE_NOTE, "rfs4_reaper_thread starting for %s", table->dbt_name));
935
936 CALLB_CPR_INIT(&table->dbt_reaper_cpr_info, &table->dbt_reaper_cv_lock,
940 do {
941 CALLB_CPR_SAFE_BEGIN(&table->dbt_reaper_cpr_info);
942 rc = cv_reltimedwait_sig(&table->dbt_reaper_wait,
943 &table->dbt_reaper_cv_lock,
944 SEC_TO_TICK(table->dbt_id_reap), TR_CLOCK_TICK);
945 CALLB_CPR_SAFE_END(&table->dbt_reaper_cpr_info,
946 &table->dbt_reaper_cv_lock);
947 rfs4_dbe_reap(table, table->dbt_max_cache_time, 0);
948 } while (rc != 0 && table->dbt_reaper_shutdown == FALSE);
949
950 CALLB_CPR_EXIT(&table->dbt_reaper_cpr_info);
951
952 NFS4_DEBUG(table->dbt_debug,
953 (CE_NOTE, "rfs4_reaper_thread exiting for %s", table->dbt_name));
954
955 /* Notify the database shutdown processing that the table is shutdown */
956 mutex_enter(table->dbt_db->db_lock);
957 table->dbt_db->db_shutdown_count--;
958 cv_signal(&table->dbt_db->db_shutdown_wait);
959 mutex_exit(table->dbt_db->db_lock);
960 zthread_exit();
961 }
962
963 static void
964 rfs4_start_reaper(rfs4_table_t *table)
965 {
966 if (table->dbt_max_cache_time == 0)
967 return;
968
969 (void) zthread_create(NULL, 0, reaper_thread, table, 0,
970 minclsyspri);
971 }
972
973 #ifdef DEBUG
974 void
975 rfs4_dbe_debug(rfs4_dbe_t *entry)
976 {
977 cmn_err(CE_NOTE, "Entry %p from table %s",
978 (void *)entry, entry->dbe_table->dbt_name);
979 cmn_err(CE_CONT, "\trefcnt = %d id = %d",
980 entry->dbe_refcnt, entry->dbe_id);
981 }
982 #endif
|