7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Copyright 2019 Nexenta Systems, Inc.
28 */
29
30 #include <sys/systm.h>
31 #include <sys/cmn_err.h>
32 #include <sys/kmem.h>
33 #include <sys/disp.h>
34 #include <sys/id_space.h>
35 #include <rpc/rpc.h>
36 #include <nfs/nfs4.h>
37 #include <nfs/nfs4_db_impl.h>
38 #include <sys/sdt.h>
39
40 static int rfs4_reap_interval = RFS4_REAP_INTERVAL;
41
42 static void rfs4_dbe_reap(rfs4_table_t *, time_t, uint32_t);
43 static void rfs4_dbe_destroy(rfs4_dbe_t *);
44 static rfs4_dbe_t *rfs4_dbe_create(rfs4_table_t *, id_t, rfs4_entry_t);
45 static void rfs4_start_reaper(rfs4_table_t *);
46
47 /*
48 * t_lowat - integer percentage of table entries /etc/system only
49 * t_hiwat - integer percentage of table entries /etc/system only
50 * t_lreap - integer percentage of table reap time mdb or /etc/system
51 * t_hreap - integer percentage of table reap time mdb or /etc/system
52 */
53 uint32_t t_lowat = 50; /* reap at t_lreap when id's in use hit 50% */
54 uint32_t t_hiwat = 75; /* reap at t_hreap when id's in use hit 75% */
55 time_t t_lreap = 50; /* default to 50% of table's reap interval */
56 time_t t_hreap = 10; /* default to 10% of table's reap interval */
57
58 id_t
59 rfs4_dbe_getid(rfs4_dbe_t *entry)
60 {
61 return (entry->dbe_id);
62 }
63
64 void
65 rfs4_dbe_hold(rfs4_dbe_t *entry)
66 {
67 if (!MUTEX_HELD(entry->dbe_lock)) {
68 mutex_enter(entry->dbe_lock);
69 entry->dbe_refcnt++;
70 mutex_exit(entry->dbe_lock);
71 } else {
72 entry->dbe_refcnt++;
73 }
74 }
75
76 /*
77 * rfs4_dbe_rele_nolock only decrements the reference count of the entry.
78 */
79 void
80 rfs4_dbe_rele_nolock(rfs4_dbe_t *entry)
81 {
82 if (!MUTEX_HELD(entry->dbe_lock)) {
83 ASSERT(entry->dbe_refcnt > 0);
84 mutex_enter(entry->dbe_lock);
85 entry->dbe_refcnt--;
86 mutex_exit(entry->dbe_lock);
87 } else {
88 entry->dbe_refcnt--;
89 }
90 }
91
92
93 uint32_t
94 rfs4_dbe_refcnt(rfs4_dbe_t *entry)
95 {
96 return (entry->dbe_refcnt);
97 }
98
99 /*
100 * Mark an entry such that the dbsearch will skip it.
101 * Caller does not want this entry to be found any longer
102 */
103 void
104 rfs4_dbe_invalidate(rfs4_dbe_t *entry)
105 {
106 if (!MUTEX_HELD(entry->dbe_lock)) {
107 mutex_enter(entry->dbe_lock);
108 entry->dbe_invalid = TRUE;
109 entry->dbe_skipsearch = TRUE;
110 mutex_exit(entry->dbe_lock);
111 } else {
112 entry->dbe_invalid = TRUE;
113 entry->dbe_skipsearch = TRUE;
114 }
115 }
116
117 /*
118 * Is this entry invalid?
119 */
120 bool_t
121 rfs4_dbe_is_invalid(rfs4_dbe_t *entry)
122 {
123 return (entry->dbe_invalid);
124 }
125
126 time_t
127 rfs4_dbe_get_timerele(rfs4_dbe_t *entry)
128 {
129 return (entry->dbe_time_rele);
130 }
131
132 /*
133 * Use these to temporarily hide/unhide a db entry.
134 */
136 rfs4_dbe_hide(rfs4_dbe_t *entry)
137 {
138 rfs4_dbe_lock(entry);
139 entry->dbe_skipsearch = TRUE;
140 rfs4_dbe_unlock(entry);
141 }
142
143 void
144 rfs4_dbe_unhide(rfs4_dbe_t *entry)
145 {
146 rfs4_dbe_lock(entry);
147 entry->dbe_skipsearch = FALSE;
148 rfs4_dbe_unlock(entry);
149 }
150
151 void
152 rfs4_dbe_rele(rfs4_dbe_t *entry)
153 {
154 mutex_enter(entry->dbe_lock);
155 ASSERT(entry->dbe_refcnt > 1);
156 entry->dbe_refcnt--;
157 entry->dbe_time_rele = gethrestime_sec();
158 mutex_exit(entry->dbe_lock);
159 }
160
161 void
162 rfs4_dbe_lock(rfs4_dbe_t *entry)
163 {
164 mutex_enter(entry->dbe_lock);
165 }
166
167 void
168 rfs4_dbe_unlock(rfs4_dbe_t *entry)
169 {
170 mutex_exit(entry->dbe_lock);
171 }
172
173 bool_t
174 rfs4_dbe_islocked(rfs4_dbe_t *entry)
175 {
176 return (mutex_owned(entry->dbe_lock));
824 }
825
826
827 static void
828 rfs4_dbe_reap(rfs4_table_t *table, time_t cache_time, uint32_t desired)
829 {
830 rfs4_index_t *idx = table->dbt_indices;
831 rfs4_bucket_t *buckets = idx->dbi_buckets, *bp;
832 rfs4_link_t *l, *t;
833 rfs4_dbe_t *entry;
834 bool_t found;
835 int i;
836 int count = 0;
837
838 NFS4_DEBUG(table->dbt_debug & REAP_DEBUG,
839 (CE_NOTE, "Reaping %d entries older than %ld seconds in table %s",
840 desired, cache_time, table->dbt_name));
841
842 /* Walk the buckets looking for entries to release/destroy */
843 for (i = 0; i < table->dbt_len; i++) {
844 int retries = 0;
845 bp = &buckets[i];
846 do {
847 found = FALSE;
848 rw_enter(bp->dbk_lock, RW_READER);
849 for (l = bp->dbk_head; l; l = l->next) {
850 entry = l->entry;
851 mutex_enter(entry->dbe_lock);
852 ASSERT(entry->dbe_refcnt != 0);
853 /*
854 * Examine an entry. Ref count of 1 means
855 * that the only reference is for the hash
856 * table reference.
857 */
858 if (entry->dbe_refcnt != 1) {
859 mutex_exit(entry->dbe_lock);
860 continue;
861 }
862 if ((entry->dbe_refcnt == 1) &&
863 (table->dbt_reaper_shutdown ||
864 table->dbt_expiry == NULL ||
865 (*table->dbt_expiry)(entry->dbe_data))) {
866 rfs4_dbe_rele_nolock(entry);
867 count++;
868 found = TRUE;
869 }
870 mutex_exit(entry->dbe_lock);
871 }
872 if (found) {
873 if (!rw_tryupgrade(bp->dbk_lock)) {
874 rw_exit(bp->dbk_lock);
875 rw_enter(bp->dbk_lock, RW_WRITER);
876 }
877
878 l = bp->dbk_head;
879 while (l) {
880 t = l;
881 entry = t->entry;
882 l = l->next;
883 mutex_enter(entry->dbe_lock);
884 if (entry->dbe_refcnt == 0) {
885 DEQUEUE(bp->dbk_head, t);
886 mutex_exit(entry->dbe_lock);
887 t->next = NULL;
888 t->prev = NULL;
889 INVALIDATE_ADDR(t->entry);
890 rfs4_dbe_destroy(entry);
891 } else
892 mutex_exit(entry->dbe_lock);
893 }
894 }
895 rw_exit(bp->dbk_lock);
896 /*
897 * delay slightly if there is more work to do
898 * with the expectation that other reaper
899 * threads are freeing data structures as well
900 * and in turn will reduce ref counts on
901 * entries in this table allowing them to be
902 * released. This is only done in the
903 * instance that the tables are being shut down.
904 */
905 if (table->dbt_reaper_shutdown && bp->dbk_head != NULL) {
906 delay(hz/100);
907 retries++;
908 }
909 /*
910 * If this is a table shutdown, keep going until
911 * everything is gone
912 */
913 } while (table->dbt_reaper_shutdown && bp->dbk_head != NULL && retries < 5);
914
915 if (!table->dbt_reaper_shutdown && desired && count >= desired)
916 break;
917 }
918
919 NFS4_DEBUG(table->dbt_debug & REAP_DEBUG,
920 (CE_NOTE, "Reaped %d entries older than %ld seconds in table %s",
921 count, cache_time, table->dbt_name));
922 }
923
924 static void
925 reaper_thread(caddr_t *arg)
926 {
927 rfs4_table_t *table = (rfs4_table_t *)arg;
928 clock_t rc;
929
930 NFS4_DEBUG(table->dbt_debug,
931 (CE_NOTE, "rfs4_reaper_thread starting for %s", table->dbt_name));
932
933 CALLB_CPR_INIT(&table->dbt_reaper_cpr_info, &table->dbt_reaper_cv_lock,
|
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Copyright 2018 Nexenta Systems, Inc.
28 */
29
30 #include <sys/systm.h>
31 #include <sys/cmn_err.h>
32 #include <sys/kmem.h>
33 #include <sys/disp.h>
34 #include <sys/id_space.h>
35 #include <sys/atomic.h>
36 #include <rpc/rpc.h>
37 #include <nfs/nfs4.h>
38 #include <nfs/nfs4_db_impl.h>
39 #include <sys/sdt.h>
40
41 static int rfs4_reap_interval = RFS4_REAP_INTERVAL;
42
43 static void rfs4_dbe_reap(rfs4_table_t *, time_t, uint32_t);
44 static void rfs4_dbe_destroy(rfs4_dbe_t *);
45 static rfs4_dbe_t *rfs4_dbe_create(rfs4_table_t *, id_t, rfs4_entry_t);
46 static void rfs4_start_reaper(rfs4_table_t *);
47
48 /*
49 * t_lowat - integer percentage of table entries /etc/system only
50 * t_hiwat - integer percentage of table entries /etc/system only
51 * t_lreap - integer percentage of table reap time mdb or /etc/system
52 * t_hreap - integer percentage of table reap time mdb or /etc/system
53 */
54 uint32_t t_lowat = 50; /* reap at t_lreap when id's in use hit 50% */
55 uint32_t t_hiwat = 75; /* reap at t_hreap when id's in use hit 75% */
56 time_t t_lreap = 50; /* default to 50% of table's reap interval */
57 time_t t_hreap = 10; /* default to 10% of table's reap interval */
58
59 id_t
60 rfs4_dbe_getid(rfs4_dbe_t *entry)
61 {
62 return (entry->dbe_id);
63 }
64
65 void
66 rfs4_dbe_hold(rfs4_dbe_t *entry)
67 {
68 atomic_inc_32(&entry->dbe_refcnt);
69 }
70
71 /*
72 * rfs4_dbe_rele_nolock only decrements the reference count of the entry.
73 */
74 void
75 rfs4_dbe_rele_nolock(rfs4_dbe_t *entry)
76 {
77 atomic_dec_32(&entry->dbe_refcnt);
78 }
79
80
81 uint32_t
82 rfs4_dbe_refcnt(rfs4_dbe_t *entry)
83 {
84 return (entry->dbe_refcnt);
85 }
86
87 /*
88 * Mark an entry such that the dbsearch will skip it.
89 * Caller does not want this entry to be found any longer
90 */
91 void
92 rfs4_dbe_invalidate(rfs4_dbe_t *entry)
93 {
94 entry->dbe_invalid = TRUE;
95 entry->dbe_skipsearch = TRUE;
96 }
97
98 /*
99 * Is this entry invalid?
100 */
101 bool_t
102 rfs4_dbe_is_invalid(rfs4_dbe_t *entry)
103 {
104 return (entry->dbe_invalid);
105 }
106
107 time_t
108 rfs4_dbe_get_timerele(rfs4_dbe_t *entry)
109 {
110 return (entry->dbe_time_rele);
111 }
112
113 /*
114 * Use these to temporarily hide/unhide a db entry.
115 */
117 rfs4_dbe_hide(rfs4_dbe_t *entry)
118 {
119 rfs4_dbe_lock(entry);
120 entry->dbe_skipsearch = TRUE;
121 rfs4_dbe_unlock(entry);
122 }
123
124 void
125 rfs4_dbe_unhide(rfs4_dbe_t *entry)
126 {
127 rfs4_dbe_lock(entry);
128 entry->dbe_skipsearch = FALSE;
129 rfs4_dbe_unlock(entry);
130 }
131
132 void
133 rfs4_dbe_rele(rfs4_dbe_t *entry)
134 {
135 mutex_enter(entry->dbe_lock);
136 ASSERT(entry->dbe_refcnt > 1);
137 atomic_dec_32(&entry->dbe_refcnt);
138 entry->dbe_time_rele = gethrestime_sec();
139 mutex_exit(entry->dbe_lock);
140 }
141
142 void
143 rfs4_dbe_lock(rfs4_dbe_t *entry)
144 {
145 mutex_enter(entry->dbe_lock);
146 }
147
148 void
149 rfs4_dbe_unlock(rfs4_dbe_t *entry)
150 {
151 mutex_exit(entry->dbe_lock);
152 }
153
154 bool_t
155 rfs4_dbe_islocked(rfs4_dbe_t *entry)
156 {
157 return (mutex_owned(entry->dbe_lock));
805 }
806
807
808 static void
809 rfs4_dbe_reap(rfs4_table_t *table, time_t cache_time, uint32_t desired)
810 {
811 rfs4_index_t *idx = table->dbt_indices;
812 rfs4_bucket_t *buckets = idx->dbi_buckets, *bp;
813 rfs4_link_t *l, *t;
814 rfs4_dbe_t *entry;
815 bool_t found;
816 int i;
817 int count = 0;
818
819 NFS4_DEBUG(table->dbt_debug & REAP_DEBUG,
820 (CE_NOTE, "Reaping %d entries older than %ld seconds in table %s",
821 desired, cache_time, table->dbt_name));
822
823 /* Walk the buckets looking for entries to release/destroy */
824 for (i = 0; i < table->dbt_len; i++) {
825 bp = &buckets[i];
826 do {
827 found = FALSE;
828 rw_enter(bp->dbk_lock, RW_READER);
829 for (l = bp->dbk_head; l; l = l->next) {
830 entry = l->entry;
831 /*
832 * Examine an entry. Ref count of 1 means
833 * that the only reference is for the hash
834 * table reference.
835 */
836 if (entry->dbe_refcnt != 1)
837 continue;
838 mutex_enter(entry->dbe_lock);
839 if ((entry->dbe_refcnt == 1) &&
840 (table->dbt_reaper_shutdown ||
841 table->dbt_expiry == NULL ||
842 (*table->dbt_expiry)(entry->dbe_data))) {
843 entry->dbe_refcnt--;
844 count++;
845 found = TRUE;
846 }
847 mutex_exit(entry->dbe_lock);
848 }
849 if (found) {
850 if (!rw_tryupgrade(bp->dbk_lock)) {
851 rw_exit(bp->dbk_lock);
852 rw_enter(bp->dbk_lock, RW_WRITER);
853 }
854
855 l = bp->dbk_head;
856 while (l) {
857 t = l;
858 entry = t->entry;
859 l = l->next;
860 if (entry->dbe_refcnt == 0) {
861 DEQUEUE(bp->dbk_head, t);
862 t->next = NULL;
863 t->prev = NULL;
864 INVALIDATE_ADDR(t->entry);
865 rfs4_dbe_destroy(entry);
866 }
867 }
868 }
869 rw_exit(bp->dbk_lock);
870 /*
871 * delay slightly if there is more work to do
872 * with the expectation that other reaper
873 * threads are freeing data structures as well
874 * and in turn will reduce ref counts on
875 * entries in this table allowing them to be
876 * released. This is only done in the
877 * instance that the tables are being shut down.
878 */
879 if (table->dbt_reaper_shutdown && bp->dbk_head != NULL)
880 delay(hz/100);
881 /*
882 * If this is a table shutdown, keep going until
883 * everything is gone
884 */
885 } while (table->dbt_reaper_shutdown && bp->dbk_head != NULL);
886
887 if (!table->dbt_reaper_shutdown && desired && count >= desired)
888 break;
889 }
890
891 NFS4_DEBUG(table->dbt_debug & REAP_DEBUG,
892 (CE_NOTE, "Reaped %d entries older than %ld seconds in table %s",
893 count, cache_time, table->dbt_name));
894 }
895
896 static void
897 reaper_thread(caddr_t *arg)
898 {
899 rfs4_table_t *table = (rfs4_table_t *)arg;
900 clock_t rc;
901
902 NFS4_DEBUG(table->dbt_debug,
903 (CE_NOTE, "rfs4_reaper_thread starting for %s", table->dbt_name));
904
905 CALLB_CPR_INIT(&table->dbt_reaper_cpr_info, &table->dbt_reaper_cv_lock,
|