Print this page
NEX-20260 NFS hung in transitional state when RSF marks it maintenance
NEX-20423 NFSv4 state database entry locking is not always used around reference count.
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
NEX-16452 NFS server in a zone state database needs to be per zone
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-15279 support NFS server in zone
NEX-15520 online NFS shares cause zoneadm halt to hang in nfs_export_zone_fini
Portions contributed by: Dan Kruchinin dan.kruchinin@nexenta.com
Portions contributed by: Stepan Zastupov stepan.zastupov@gmail.com
Reviewed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/nfs/nfs4_db.c
+++ new/usr/src/uts/common/fs/nfs/nfs4_db.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
|
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 +
21 22 /*
22 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
23 24 */
24 25
26 +/*
27 + * Copyright 2019 Nexenta Systems, Inc.
28 + */
29 +
25 30 #include <sys/systm.h>
26 31 #include <sys/cmn_err.h>
27 32 #include <sys/kmem.h>
28 33 #include <sys/disp.h>
29 34 #include <sys/id_space.h>
30 -#include <sys/atomic.h>
31 35 #include <rpc/rpc.h>
32 36 #include <nfs/nfs4.h>
33 37 #include <nfs/nfs4_db_impl.h>
34 38 #include <sys/sdt.h>
35 39
36 40 static int rfs4_reap_interval = RFS4_REAP_INTERVAL;
37 41
38 42 static void rfs4_dbe_reap(rfs4_table_t *, time_t, uint32_t);
39 43 static void rfs4_dbe_destroy(rfs4_dbe_t *);
40 44 static rfs4_dbe_t *rfs4_dbe_create(rfs4_table_t *, id_t, rfs4_entry_t);
41 45 static void rfs4_start_reaper(rfs4_table_t *);
42 46
43 47 /*
44 48 * t_lowat - integer percentage of table entries /etc/system only
45 49 * t_hiwat - integer percentage of table entries /etc/system only
46 50 * t_lreap - integer percentage of table reap time mdb or /etc/system
47 51 * t_hreap - integer percentage of table reap time mdb or /etc/system
48 52 */
49 53 uint32_t t_lowat = 50; /* reap at t_lreap when id's in use hit 50% */
50 54 uint32_t t_hiwat = 75; /* reap at t_hreap when id's in use hit 75% */
51 55 time_t t_lreap = 50; /* default to 50% of table's reap interval */
52 56 time_t t_hreap = 10; /* default to 10% of table's reap interval */
|
↓ open down ↓ |
12 lines elided |
↑ open up ↑ |
53 57
54 58 id_t
55 59 rfs4_dbe_getid(rfs4_dbe_t *entry)
56 60 {
57 61 return (entry->dbe_id);
58 62 }
59 63
60 64 void
61 65 rfs4_dbe_hold(rfs4_dbe_t *entry)
62 66 {
63 - atomic_inc_32(&entry->dbe_refcnt);
67 + if (!MUTEX_HELD(entry->dbe_lock)) {
68 + mutex_enter(entry->dbe_lock);
69 + entry->dbe_refcnt++;
70 + mutex_exit(entry->dbe_lock);
71 + } else {
72 + entry->dbe_refcnt++;
73 + }
64 74 }
65 75
66 76 /*
67 77 * rfs4_dbe_rele_nolock only decrements the reference count of the entry.
68 78 */
69 79 void
70 80 rfs4_dbe_rele_nolock(rfs4_dbe_t *entry)
71 81 {
72 - atomic_dec_32(&entry->dbe_refcnt);
82 + if (!MUTEX_HELD(entry->dbe_lock)) {
83 + ASSERT(entry->dbe_refcnt > 0);
84 + mutex_enter(entry->dbe_lock);
85 + entry->dbe_refcnt--;
86 + mutex_exit(entry->dbe_lock);
87 + } else {
88 + entry->dbe_refcnt--;
89 + }
73 90 }
74 91
75 92
76 93 uint32_t
77 94 rfs4_dbe_refcnt(rfs4_dbe_t *entry)
78 95 {
79 96 return (entry->dbe_refcnt);
80 97 }
81 98
82 99 /*
83 100 * Mark an entry such that the dbsearch will skip it.
84 101 * Caller does not want this entry to be found any longer
85 102 */
86 103 void
87 104 rfs4_dbe_invalidate(rfs4_dbe_t *entry)
88 105 {
89 - entry->dbe_invalid = TRUE;
90 - entry->dbe_skipsearch = TRUE;
106 + if (!MUTEX_HELD(entry->dbe_lock)) {
107 + mutex_enter(entry->dbe_lock);
108 + entry->dbe_invalid = TRUE;
109 + entry->dbe_skipsearch = TRUE;
110 + mutex_exit(entry->dbe_lock);
111 + } else {
112 + entry->dbe_invalid = TRUE;
113 + entry->dbe_skipsearch = TRUE;
114 + }
91 115 }
92 116
93 117 /*
94 118 * Is this entry invalid?
95 119 */
96 120 bool_t
97 121 rfs4_dbe_is_invalid(rfs4_dbe_t *entry)
98 122 {
99 123 return (entry->dbe_invalid);
100 124 }
101 125
102 126 time_t
103 127 rfs4_dbe_get_timerele(rfs4_dbe_t *entry)
104 128 {
105 129 return (entry->dbe_time_rele);
106 130 }
107 131
108 132 /*
109 133 * Use these to temporarily hide/unhide a db entry.
110 134 */
111 135 void
112 136 rfs4_dbe_hide(rfs4_dbe_t *entry)
113 137 {
114 138 rfs4_dbe_lock(entry);
115 139 entry->dbe_skipsearch = TRUE;
116 140 rfs4_dbe_unlock(entry);
117 141 }
118 142
119 143 void
120 144 rfs4_dbe_unhide(rfs4_dbe_t *entry)
121 145 {
|
↓ open down ↓ |
21 lines elided |
↑ open up ↑ |
122 146 rfs4_dbe_lock(entry);
123 147 entry->dbe_skipsearch = FALSE;
124 148 rfs4_dbe_unlock(entry);
125 149 }
126 150
127 151 void
128 152 rfs4_dbe_rele(rfs4_dbe_t *entry)
129 153 {
130 154 mutex_enter(entry->dbe_lock);
131 155 ASSERT(entry->dbe_refcnt > 1);
132 - atomic_dec_32(&entry->dbe_refcnt);
156 + entry->dbe_refcnt--;
133 157 entry->dbe_time_rele = gethrestime_sec();
134 158 mutex_exit(entry->dbe_lock);
135 159 }
136 160
137 161 void
138 162 rfs4_dbe_lock(rfs4_dbe_t *entry)
139 163 {
140 164 mutex_enter(entry->dbe_lock);
141 165 }
142 166
143 167 void
144 168 rfs4_dbe_unlock(rfs4_dbe_t *entry)
145 169 {
146 170 mutex_exit(entry->dbe_lock);
147 171 }
148 172
149 173 bool_t
150 174 rfs4_dbe_islocked(rfs4_dbe_t *entry)
151 175 {
152 176 return (mutex_owned(entry->dbe_lock));
153 177 }
154 178
155 179 clock_t
156 180 rfs4_dbe_twait(rfs4_dbe_t *entry, clock_t timeout)
157 181 {
158 182 return (cv_timedwait(entry->dbe_cv, entry->dbe_lock, timeout));
159 183 }
160 184
161 185 void
162 186 rfs4_dbe_cv_broadcast(rfs4_dbe_t *entry)
163 187 {
164 188 cv_broadcast(entry->dbe_cv);
165 189 }
166 190
167 191 /* ARGSUSED */
168 192 static int
169 193 rfs4_dbe_kmem_constructor(void *obj, void *private, int kmflag)
170 194 {
171 195 rfs4_dbe_t *entry = obj;
172 196
173 197 mutex_init(entry->dbe_lock, NULL, MUTEX_DEFAULT, NULL);
174 198 cv_init(entry->dbe_cv, NULL, CV_DEFAULT, NULL);
175 199
176 200 return (0);
177 201 }
178 202
179 203 static void
180 204 rfs4_dbe_kmem_destructor(void *obj, void *private)
181 205 {
182 206 rfs4_dbe_t *entry = obj;
183 207 /*LINTED*/
184 208 rfs4_table_t *table = private;
185 209
186 210 mutex_destroy(entry->dbe_lock);
187 211 cv_destroy(entry->dbe_cv);
188 212 }
189 213
190 214 rfs4_database_t *
191 215 rfs4_database_create(uint32_t flags)
192 216 {
193 217 rfs4_database_t *db;
194 218
195 219 db = kmem_alloc(sizeof (rfs4_database_t), KM_SLEEP);
196 220 mutex_init(db->db_lock, NULL, MUTEX_DEFAULT, NULL);
197 221 db->db_tables = NULL;
198 222 db->db_debug_flags = flags;
199 223 db->db_shutdown_count = 0;
200 224 cv_init(&db->db_shutdown_wait, NULL, CV_DEFAULT, NULL);
201 225 return (db);
202 226 }
203 227
204 228
205 229 /*
206 230 * The reaper threads that have been created for the tables in this
207 231 * database must be stopped and the entries in the tables released.
208 232 * Each table will be marked as "shutdown" and the reaper threads
209 233 * poked and they will see that a shutdown is in progress and cleanup
210 234 * and exit. This function waits for all reaper threads to stop
211 235 * before returning to the caller.
212 236 */
213 237 void
214 238 rfs4_database_shutdown(rfs4_database_t *db)
215 239 {
216 240 rfs4_table_t *table;
217 241
218 242 mutex_enter(db->db_lock);
219 243 for (table = db->db_tables; table; table = table->dbt_tnext) {
220 244 mutex_enter(&table->dbt_reaper_cv_lock);
221 245 table->dbt_reaper_shutdown = TRUE;
222 246 cv_broadcast(&table->dbt_reaper_wait);
223 247 db->db_shutdown_count++;
224 248 mutex_exit(&table->dbt_reaper_cv_lock);
225 249 }
226 250 while (db->db_shutdown_count > 0) {
227 251 cv_wait(&db->db_shutdown_wait, db->db_lock);
228 252 }
229 253 mutex_exit(db->db_lock);
230 254 }
231 255
232 256 /*
233 257 * Given a database that has been "shutdown" by the function above all
234 258 * of the table tables are destroyed and then the database itself
235 259 * freed.
236 260 */
237 261 void
238 262 rfs4_database_destroy(rfs4_database_t *db)
239 263 {
240 264 rfs4_table_t *next, *tmp;
241 265
|
↓ open down ↓ |
99 lines elided |
↑ open up ↑ |
242 266 for (next = db->db_tables; next; ) {
243 267 tmp = next;
244 268 next = tmp->dbt_tnext;
245 269 rfs4_table_destroy(db, tmp);
246 270 }
247 271
248 272 mutex_destroy(db->db_lock);
249 273 kmem_free(db, sizeof (rfs4_database_t));
250 274 }
251 275
276 +/*
277 + * Used to get the correct kmem_cache database for the state table being
278 + * created.
279 + * Helper function for rfs4_table_create
280 + */
281 +static kmem_cache_t *
282 +get_db_mem_cache(char *name)
283 +{
284 + int i;
285 +
286 + for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) {
287 + if (strcmp(name, rfs4_db_mem_cache_table[i].r_db_name) == 0)
288 + return (rfs4_db_mem_cache_table[i].r_db_mem_cache);
289 + }
290 + /*
291 + * There is no associated kmem cache for this NFS4 server state
292 + * table name
293 + */
294 + return (NULL);
295 +}
296 +
297 +/*
298 + * Used to initialize the global NFSv4 server state database.
299 + * Helper funtion for rfs4_state_g_init and called when module is loaded.
300 + */
301 +kmem_cache_t *
302 +/* CSTYLED */
303 +nfs4_init_mem_cache(char *cache_name, uint32_t idxcnt, uint32_t size, uint32_t idx)
304 +{
305 + kmem_cache_t *mem_cache = kmem_cache_create(cache_name,
306 + sizeof (rfs4_dbe_t) + idxcnt * sizeof (rfs4_link_t) + size,
307 + 0,
308 + rfs4_dbe_kmem_constructor,
309 + rfs4_dbe_kmem_destructor,
310 + NULL,
311 + NULL,
312 + NULL,
313 + 0);
314 + (void) strlcpy(rfs4_db_mem_cache_table[idx].r_db_name, cache_name,
315 + strlen(cache_name) + 1);
316 + rfs4_db_mem_cache_table[idx].r_db_mem_cache = mem_cache;
317 + return (mem_cache);
318 +}
319 +
252 320 rfs4_table_t *
253 321 rfs4_table_create(rfs4_database_t *db, char *tabname, time_t max_cache_time,
254 322 uint32_t idxcnt, bool_t (*create)(rfs4_entry_t, void *),
255 323 void (*destroy)(rfs4_entry_t),
256 324 bool_t (*expiry)(rfs4_entry_t),
257 325 uint32_t size, uint32_t hashsize,
258 326 uint32_t maxentries, id_t start)
259 327 {
260 328 rfs4_table_t *table;
261 329 int len;
262 330 char *cache_name;
263 331 char *id_name;
264 332
265 333 table = kmem_alloc(sizeof (rfs4_table_t), KM_SLEEP);
266 334 table->dbt_db = db;
267 335 rw_init(table->dbt_t_lock, NULL, RW_DEFAULT, NULL);
268 336 mutex_init(table->dbt_lock, NULL, MUTEX_DEFAULT, NULL);
269 337 mutex_init(&table->dbt_reaper_cv_lock, NULL, MUTEX_DEFAULT, NULL);
270 338 cv_init(&table->dbt_reaper_wait, NULL, CV_DEFAULT, NULL);
271 339
272 340 len = strlen(tabname);
273 341 table->dbt_name = kmem_alloc(len+1, KM_SLEEP);
274 342 cache_name = kmem_alloc(len + 12 /* "_entry_cache" */ + 1, KM_SLEEP);
275 343 (void) strcpy(table->dbt_name, tabname);
276 344 (void) sprintf(cache_name, "%s_entry_cache", table->dbt_name);
277 345 table->dbt_max_cache_time = max_cache_time;
278 346 table->dbt_usize = size;
279 347 table->dbt_len = hashsize;
280 348 table->dbt_count = 0;
281 349 table->dbt_idxcnt = 0;
282 350 table->dbt_ccnt = 0;
283 351 table->dbt_maxcnt = idxcnt;
284 352 table->dbt_indices = NULL;
285 353 table->dbt_id_space = NULL;
286 354 table->dbt_reaper_shutdown = FALSE;
287 355
288 356 if (start >= 0) {
289 357 if (maxentries + (uint32_t)start > (uint32_t)INT32_MAX)
290 358 maxentries = INT32_MAX - start;
291 359 id_name = kmem_alloc(len + 9 /* "_id_space" */ + 1, KM_SLEEP);
292 360 (void) sprintf(id_name, "%s_id_space", table->dbt_name);
293 361 table->dbt_id_space = id_space_create(id_name, start,
294 362 maxentries + start);
295 363 kmem_free(id_name, len + 10);
296 364 }
|
↓ open down ↓ |
35 lines elided |
↑ open up ↑ |
297 365 ASSERT(t_lowat != 0);
298 366 table->dbt_id_lwat = (maxentries * t_lowat) / 100;
299 367 ASSERT(t_hiwat != 0);
300 368 table->dbt_id_hwat = (maxentries * t_hiwat) / 100;
301 369 table->dbt_id_reap = MIN(rfs4_reap_interval, max_cache_time);
302 370 table->dbt_maxentries = maxentries;
303 371 table->dbt_create = create;
304 372 table->dbt_destroy = destroy;
305 373 table->dbt_expiry = expiry;
306 374
307 - table->dbt_mem_cache = kmem_cache_create(cache_name,
308 - sizeof (rfs4_dbe_t) + idxcnt * sizeof (rfs4_link_t) + size,
309 - 0,
310 - rfs4_dbe_kmem_constructor,
311 - rfs4_dbe_kmem_destructor,
312 - NULL,
313 - table,
314 - NULL,
315 - 0);
375 + /*
376 + * get the correct kmem_cache for this table type based on the name.
377 + */
378 + table->dbt_mem_cache = get_db_mem_cache(cache_name);
379 +
316 380 kmem_free(cache_name, len+13);
317 381
318 382 table->dbt_debug = db->db_debug_flags;
319 383
320 384 mutex_enter(db->db_lock);
321 385 table->dbt_tnext = db->db_tables;
322 386 db->db_tables = table;
323 387 mutex_exit(db->db_lock);
324 388
325 389 rfs4_start_reaper(table);
326 390
327 391 return (table);
328 392 }
329 393
330 394 void
331 395 rfs4_table_destroy(rfs4_database_t *db, rfs4_table_t *table)
332 396 {
333 397 rfs4_table_t *p;
334 398 rfs4_index_t *idx;
335 399
336 400 ASSERT(table->dbt_count == 0);
337 401
338 402 mutex_enter(db->db_lock);
339 403 if (table == db->db_tables)
340 404 db->db_tables = table->dbt_tnext;
341 405 else {
342 406 for (p = db->db_tables; p; p = p->dbt_tnext)
343 407 if (p->dbt_tnext == table) {
344 408 p->dbt_tnext = table->dbt_tnext;
345 409 table->dbt_tnext = NULL;
346 410 break;
347 411 }
348 412 ASSERT(p != NULL);
349 413 }
350 414 mutex_exit(db->db_lock);
351 415
352 416 /* Destroy indices */
353 417 while (table->dbt_indices) {
354 418 idx = table->dbt_indices;
355 419 table->dbt_indices = idx->dbi_inext;
356 420 rfs4_index_destroy(idx);
|
↓ open down ↓ |
31 lines elided |
↑ open up ↑ |
357 421 }
358 422
359 423 rw_destroy(table->dbt_t_lock);
360 424 mutex_destroy(table->dbt_lock);
361 425 mutex_destroy(&table->dbt_reaper_cv_lock);
362 426 cv_destroy(&table->dbt_reaper_wait);
363 427
364 428 kmem_free(table->dbt_name, strlen(table->dbt_name) + 1);
365 429 if (table->dbt_id_space)
366 430 id_space_destroy(table->dbt_id_space);
367 - kmem_cache_destroy(table->dbt_mem_cache);
431 + table->dbt_mem_cache = NULL;
368 432 kmem_free(table, sizeof (rfs4_table_t));
369 433 }
370 434
371 435 rfs4_index_t *
372 436 rfs4_index_create(rfs4_table_t *table, char *keyname,
373 437 uint32_t (*hash)(void *),
374 438 bool_t (compare)(rfs4_entry_t, void *),
375 439 void *(*mkkey)(rfs4_entry_t),
376 440 bool_t createable)
377 441 {
378 442 rfs4_index_t *idx;
379 443
380 444 ASSERT(table->dbt_idxcnt < table->dbt_maxcnt);
381 445
382 446 idx = kmem_alloc(sizeof (rfs4_index_t), KM_SLEEP);
383 447
384 448 idx->dbi_table = table;
385 449 idx->dbi_keyname = kmem_alloc(strlen(keyname) + 1, KM_SLEEP);
386 450 (void) strcpy(idx->dbi_keyname, keyname);
387 451 idx->dbi_hash = hash;
388 452 idx->dbi_compare = compare;
389 453 idx->dbi_mkkey = mkkey;
390 454 idx->dbi_tblidx = table->dbt_idxcnt;
391 455 table->dbt_idxcnt++;
392 456 if (createable) {
393 457 table->dbt_ccnt++;
394 458 if (table->dbt_ccnt > 1)
395 459 panic("Table %s currently can have only have one "
396 460 "index that will allow creation of entries",
397 461 table->dbt_name);
398 462 idx->dbi_createable = TRUE;
399 463 } else {
400 464 idx->dbi_createable = FALSE;
401 465 }
402 466
403 467 idx->dbi_inext = table->dbt_indices;
404 468 table->dbt_indices = idx;
405 469 idx->dbi_buckets = kmem_zalloc(sizeof (rfs4_bucket_t) * table->dbt_len,
406 470 KM_SLEEP);
407 471
408 472 return (idx);
409 473 }
410 474
411 475 void
412 476 rfs4_index_destroy(rfs4_index_t *idx)
413 477 {
414 478 kmem_free(idx->dbi_keyname, strlen(idx->dbi_keyname) + 1);
415 479 kmem_free(idx->dbi_buckets,
416 480 sizeof (rfs4_bucket_t) * idx->dbi_table->dbt_len);
417 481 kmem_free(idx, sizeof (rfs4_index_t));
418 482 }
419 483
420 484 static void
421 485 rfs4_dbe_destroy(rfs4_dbe_t *entry)
422 486 {
423 487 rfs4_index_t *idx;
424 488 void *key;
425 489 int i;
426 490 rfs4_bucket_t *bp;
427 491 rfs4_table_t *table = entry->dbe_table;
428 492 rfs4_link_t *l;
429 493
430 494 NFS4_DEBUG(table->dbt_debug & DESTROY_DEBUG,
431 495 (CE_NOTE, "Destroying entry %p from %s",
432 496 (void*)entry, table->dbt_name));
433 497
434 498 mutex_enter(entry->dbe_lock);
435 499 ASSERT(entry->dbe_refcnt == 0);
436 500 mutex_exit(entry->dbe_lock);
437 501
438 502 /* Unlink from all indices */
439 503 for (idx = table->dbt_indices; idx; idx = idx->dbi_inext) {
440 504 l = &entry->dbe_indices[idx->dbi_tblidx];
441 505 /* check and see if we were ever linked in to the index */
442 506 if (INVALID_LINK(l)) {
443 507 ASSERT(l->next == NULL && l->prev == NULL);
444 508 continue;
445 509 }
446 510 key = idx->dbi_mkkey(entry->dbe_data);
447 511 i = HASH(idx, key);
448 512 bp = &idx->dbi_buckets[i];
449 513 ASSERT(bp->dbk_head != NULL);
450 514 DEQUEUE_IDX(bp, &entry->dbe_indices[idx->dbi_tblidx]);
451 515 }
452 516
453 517 /* Destroy user data */
454 518 if (table->dbt_destroy)
455 519 (*table->dbt_destroy)(entry->dbe_data);
456 520
457 521 if (table->dbt_id_space)
458 522 id_free(table->dbt_id_space, entry->dbe_id);
459 523
460 524 mutex_enter(table->dbt_lock);
461 525 table->dbt_count--;
462 526 mutex_exit(table->dbt_lock);
463 527
464 528 /* Destroy the entry itself */
465 529 kmem_cache_free(table->dbt_mem_cache, entry);
466 530 }
467 531
468 532
469 533 static rfs4_dbe_t *
470 534 rfs4_dbe_create(rfs4_table_t *table, id_t id, rfs4_entry_t data)
471 535 {
472 536 rfs4_dbe_t *entry;
473 537 int i;
474 538
475 539 NFS4_DEBUG(table->dbt_debug & CREATE_DEBUG,
476 540 (CE_NOTE, "Creating entry in table %s", table->dbt_name));
477 541
478 542 entry = kmem_cache_alloc(table->dbt_mem_cache, KM_SLEEP);
479 543
480 544 entry->dbe_refcnt = 1;
481 545 entry->dbe_invalid = FALSE;
482 546 entry->dbe_skipsearch = FALSE;
483 547 entry->dbe_time_rele = 0;
484 548 entry->dbe_id = 0;
485 549
486 550 if (table->dbt_id_space)
487 551 entry->dbe_id = id;
488 552 entry->dbe_table = table;
489 553
490 554 for (i = 0; i < table->dbt_maxcnt; i++) {
491 555 entry->dbe_indices[i].next = entry->dbe_indices[i].prev = NULL;
492 556 entry->dbe_indices[i].entry = entry;
493 557 /*
494 558 * We mark the entry as not indexed by setting the low
495 559 * order bit, since address are word aligned. This has
496 560 * the advantage of causeing a trap if the address is
497 561 * used. After the entry is linked in to the
498 562 * corresponding index the bit will be cleared.
499 563 */
500 564 INVALIDATE_ADDR(entry->dbe_indices[i].entry);
501 565 }
502 566
503 567 entry->dbe_data = (rfs4_entry_t)&entry->dbe_indices[table->dbt_maxcnt];
504 568 bzero(entry->dbe_data, table->dbt_usize);
505 569 entry->dbe_data->dbe = entry;
506 570
507 571 if (!(*table->dbt_create)(entry->dbe_data, data)) {
508 572 kmem_cache_free(table->dbt_mem_cache, entry);
509 573 return (NULL);
510 574 }
511 575
512 576 mutex_enter(table->dbt_lock);
513 577 table->dbt_count++;
514 578 mutex_exit(table->dbt_lock);
515 579
516 580 return (entry);
517 581 }
518 582
519 583 static void
520 584 rfs4_dbe_tabreap_adjust(rfs4_table_t *table)
521 585 {
522 586 clock_t tabreap;
523 587 clock_t reap_int;
524 588 uint32_t in_use;
525 589
526 590 /*
527 591 * Adjust the table's reap interval based on the
528 592 * number of id's currently in use. Each table's
529 593 * default remains the same if id usage subsides.
530 594 */
531 595 ASSERT(MUTEX_HELD(&table->dbt_reaper_cv_lock));
532 596 tabreap = MIN(rfs4_reap_interval, table->dbt_max_cache_time);
533 597
534 598 in_use = table->dbt_count + 1; /* see rfs4_dbe_create */
535 599 if (in_use >= table->dbt_id_hwat) {
536 600 ASSERT(t_hreap != 0);
537 601 reap_int = (tabreap * t_hreap) / 100;
538 602 } else if (in_use >= table->dbt_id_lwat) {
539 603 ASSERT(t_lreap != 0);
540 604 reap_int = (tabreap * t_lreap) / 100;
541 605 } else {
542 606 reap_int = tabreap;
543 607 }
544 608 table->dbt_id_reap = reap_int;
545 609 DTRACE_PROBE2(table__reap__interval, char *,
546 610 table->dbt_name, time_t, table->dbt_id_reap);
547 611 }
548 612
549 613 rfs4_entry_t
550 614 rfs4_dbsearch(rfs4_index_t *idx, void *key, bool_t *create, void *arg,
551 615 rfs4_dbsearch_type_t dbsearch_type)
552 616 {
553 617 int already_done;
554 618 uint32_t i;
555 619 rfs4_table_t *table = idx->dbi_table;
556 620 rfs4_index_t *ip;
557 621 rfs4_bucket_t *bp;
558 622 rfs4_link_t *l;
559 623 rfs4_dbe_t *entry;
560 624 id_t id = -1;
561 625
562 626 i = HASH(idx, key);
563 627 bp = &idx->dbi_buckets[i];
564 628
565 629 NFS4_DEBUG(table->dbt_debug & SEARCH_DEBUG,
566 630 (CE_NOTE, "Searching for key %p in table %s by %s",
567 631 key, table->dbt_name, idx->dbi_keyname));
568 632
569 633 rw_enter(bp->dbk_lock, RW_READER);
570 634 retry:
571 635 for (l = bp->dbk_head; l; l = l->next) {
572 636 if (l->entry->dbe_refcnt > 0 &&
573 637 (l->entry->dbe_skipsearch == FALSE ||
574 638 (l->entry->dbe_skipsearch == TRUE &&
575 639 dbsearch_type == RFS4_DBS_INVALID)) &&
576 640 (*idx->dbi_compare)(l->entry->dbe_data, key)) {
577 641 mutex_enter(l->entry->dbe_lock);
578 642 if (l->entry->dbe_refcnt == 0) {
579 643 mutex_exit(l->entry->dbe_lock);
580 644 continue;
581 645 }
582 646
583 647 /* place an additional hold since we are returning */
584 648 rfs4_dbe_hold(l->entry);
585 649
586 650 mutex_exit(l->entry->dbe_lock);
587 651 rw_exit(bp->dbk_lock);
588 652
589 653 *create = FALSE;
590 654
591 655 NFS4_DEBUG((table->dbt_debug & SEARCH_DEBUG),
592 656 (CE_NOTE, "Found entry %p for %p in table %s",
593 657 (void *)l->entry, key, table->dbt_name));
594 658
595 659 if (id != -1)
596 660 id_free(table->dbt_id_space, id);
597 661 return (l->entry->dbe_data);
598 662 }
599 663 }
600 664
601 665 if (!*create || table->dbt_create == NULL || !idx->dbi_createable ||
602 666 table->dbt_maxentries == table->dbt_count) {
603 667 NFS4_DEBUG(table->dbt_debug & SEARCH_DEBUG,
604 668 (CE_NOTE, "Entry for %p in %s not found",
605 669 key, table->dbt_name));
606 670
607 671 rw_exit(bp->dbk_lock);
608 672 if (id != -1)
609 673 id_free(table->dbt_id_space, id);
610 674 return (NULL);
611 675 }
612 676
613 677 if (table->dbt_id_space && id == -1) {
614 678 rw_exit(bp->dbk_lock);
615 679
616 680 /* get an id, ok to sleep for it here */
617 681 id = id_alloc(table->dbt_id_space);
618 682 ASSERT(id != -1);
619 683
620 684 mutex_enter(&table->dbt_reaper_cv_lock);
621 685 rfs4_dbe_tabreap_adjust(table);
622 686 mutex_exit(&table->dbt_reaper_cv_lock);
623 687
624 688 rw_enter(bp->dbk_lock, RW_WRITER);
625 689 goto retry;
626 690 }
627 691
628 692 /* get an exclusive lock on the bucket */
629 693 if (rw_read_locked(bp->dbk_lock) && !rw_tryupgrade(bp->dbk_lock)) {
630 694 NFS4_DEBUG(table->dbt_debug & OTHER_DEBUG,
631 695 (CE_NOTE, "Trying to upgrade lock on "
632 696 "hash chain %d (%p) for %s by %s",
633 697 i, (void*)bp, table->dbt_name, idx->dbi_keyname));
634 698
635 699 rw_exit(bp->dbk_lock);
636 700 rw_enter(bp->dbk_lock, RW_WRITER);
637 701 goto retry;
638 702 }
639 703
640 704 /* create entry */
641 705 entry = rfs4_dbe_create(table, id, arg);
642 706 if (entry == NULL) {
643 707 rw_exit(bp->dbk_lock);
644 708 if (id != -1)
645 709 id_free(table->dbt_id_space, id);
646 710
647 711 NFS4_DEBUG(table->dbt_debug & CREATE_DEBUG,
648 712 (CE_NOTE, "Constructor for table %s failed",
649 713 table->dbt_name));
650 714 return (NULL);
651 715 }
652 716
653 717 /*
654 718 * Add one ref for entry into table's hash - only one
655 719 * reference added even though there may be multiple indices
656 720 */
657 721 rfs4_dbe_hold(entry);
658 722 ENQUEUE(bp->dbk_head, &entry->dbe_indices[idx->dbi_tblidx]);
659 723 VALIDATE_ADDR(entry->dbe_indices[idx->dbi_tblidx].entry);
660 724
661 725 already_done = idx->dbi_tblidx;
662 726 rw_exit(bp->dbk_lock);
663 727
664 728 for (ip = table->dbt_indices; ip; ip = ip->dbi_inext) {
665 729 if (ip->dbi_tblidx == already_done)
666 730 continue;
667 731 l = &entry->dbe_indices[ip->dbi_tblidx];
668 732 i = HASH(ip, ip->dbi_mkkey(entry->dbe_data));
669 733 ASSERT(i < ip->dbi_table->dbt_len);
670 734 bp = &ip->dbi_buckets[i];
671 735 ENQUEUE_IDX(bp, l);
672 736 }
673 737
674 738 NFS4_DEBUG(
675 739 table->dbt_debug & SEARCH_DEBUG || table->dbt_debug & CREATE_DEBUG,
|
↓ open down ↓ |
298 lines elided |
↑ open up ↑ |
676 740 (CE_NOTE, "Entry %p created for %s = %p in table %s",
677 741 (void*)entry, idx->dbi_keyname, (void*)key, table->dbt_name));
678 742
679 743 return (entry->dbe_data);
680 744 }
681 745
682 746 /*ARGSUSED*/
683 747 boolean_t
684 748 rfs4_cpr_callb(void *arg, int code)
685 749 {
686 - rfs4_table_t *table = rfs4_client_tab;
687 750 rfs4_bucket_t *buckets, *bp;
688 751 rfs4_link_t *l;
689 752 rfs4_client_t *cp;
690 753 int i;
691 754
755 + nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
756 + rfs4_table_t *table = nsrv4->rfs4_client_tab;
757 +
692 758 /*
693 759 * We get called for Suspend and Resume events.
694 760 * For the suspend case we simply don't care! Nor do we care if
695 761 * there are no clients.
696 762 */
697 763 if (code == CB_CODE_CPR_CHKPT || table == NULL) {
698 764 return (B_TRUE);
699 765 }
700 766
701 767 buckets = table->dbt_indices->dbi_buckets;
702 768
703 769 /*
704 770 * When we get this far we are in the process of
705 771 * resuming the system from a previous suspend.
706 772 *
707 773 * We are going to blast through and update the
708 774 * last_access time for all the clients and in
709 775 * doing so extend them by one lease period.
710 776 */
711 777 for (i = 0; i < table->dbt_len; i++) {
712 778 bp = &buckets[i];
713 779 for (l = bp->dbk_head; l; l = l->next) {
714 780 cp = (rfs4_client_t *)l->entry->dbe_data;
715 781 cp->rc_last_access = gethrestime_sec();
716 782 }
717 783 }
718 784
719 785 return (B_TRUE);
720 786 }
721 787
722 788 /*
723 789 * Given a table, lock each of the buckets and walk all entries (in
724 790 * turn locking those) and calling the provided "callout" function
725 791 * with the provided parameter. Obviously used to iterate across all
726 792 * entries in a particular table via the database locking hierarchy.
727 793 * Obviously the caller must not hold locks on any of the entries in
728 794 * the specified table.
729 795 */
730 796 void
731 797 rfs4_dbe_walk(rfs4_table_t *table,
732 798 void (*callout)(rfs4_entry_t, void *),
733 799 void *data)
734 800 {
735 801 rfs4_bucket_t *buckets = table->dbt_indices->dbi_buckets, *bp;
736 802 rfs4_link_t *l;
737 803 rfs4_dbe_t *entry;
738 804 int i;
739 805
740 806 NFS4_DEBUG(table->dbt_debug & WALK_DEBUG,
741 807 (CE_NOTE, "Walking entries in %s", table->dbt_name));
742 808
743 809 /* Walk the buckets looking for entries to release/destroy */
744 810 for (i = 0; i < table->dbt_len; i++) {
745 811 bp = &buckets[i];
746 812 rw_enter(bp->dbk_lock, RW_READER);
747 813 for (l = bp->dbk_head; l; l = l->next) {
748 814 entry = l->entry;
749 815 mutex_enter(entry->dbe_lock);
750 816 (*callout)(entry->dbe_data, data);
751 817 mutex_exit(entry->dbe_lock);
752 818 }
753 819 rw_exit(bp->dbk_lock);
754 820 }
755 821
756 822 NFS4_DEBUG(table->dbt_debug & WALK_DEBUG,
757 823 (CE_NOTE, "Walking entries complete %s", table->dbt_name));
758 824 }
759 825
760 826
761 827 static void
762 828 rfs4_dbe_reap(rfs4_table_t *table, time_t cache_time, uint32_t desired)
763 829 {
764 830 rfs4_index_t *idx = table->dbt_indices;
765 831 rfs4_bucket_t *buckets = idx->dbi_buckets, *bp;
766 832 rfs4_link_t *l, *t;
767 833 rfs4_dbe_t *entry;
|
↓ open down ↓ |
66 lines elided |
↑ open up ↑ |
768 834 bool_t found;
769 835 int i;
770 836 int count = 0;
771 837
772 838 NFS4_DEBUG(table->dbt_debug & REAP_DEBUG,
773 839 (CE_NOTE, "Reaping %d entries older than %ld seconds in table %s",
774 840 desired, cache_time, table->dbt_name));
775 841
776 842 /* Walk the buckets looking for entries to release/destroy */
777 843 for (i = 0; i < table->dbt_len; i++) {
844 + int retries = 0;
778 845 bp = &buckets[i];
779 846 do {
780 847 found = FALSE;
781 848 rw_enter(bp->dbk_lock, RW_READER);
782 849 for (l = bp->dbk_head; l; l = l->next) {
783 850 entry = l->entry;
851 + mutex_enter(entry->dbe_lock);
852 + ASSERT(entry->dbe_refcnt != 0);
784 853 /*
785 854 * Examine an entry. Ref count of 1 means
786 855 * that the only reference is for the hash
787 856 * table reference.
788 857 */
789 - if (entry->dbe_refcnt != 1)
858 + if (entry->dbe_refcnt != 1) {
859 +#ifdef DEBUG
860 + rfs4_dbe_debug(entry);
861 +#endif
862 + mutex_exit(entry->dbe_lock);
790 863 continue;
791 - mutex_enter(entry->dbe_lock);
864 + }
792 865 if ((entry->dbe_refcnt == 1) &&
793 866 (table->dbt_reaper_shutdown ||
794 867 table->dbt_expiry == NULL ||
795 868 (*table->dbt_expiry)(entry->dbe_data))) {
796 - entry->dbe_refcnt--;
869 + rfs4_dbe_rele_nolock(entry);
797 870 count++;
798 871 found = TRUE;
799 872 }
800 873 mutex_exit(entry->dbe_lock);
801 874 }
802 875 if (found) {
803 876 if (!rw_tryupgrade(bp->dbk_lock)) {
804 877 rw_exit(bp->dbk_lock);
805 878 rw_enter(bp->dbk_lock, RW_WRITER);
806 879 }
807 880
808 881 l = bp->dbk_head;
809 882 while (l) {
810 883 t = l;
811 884 entry = t->entry;
812 885 l = l->next;
886 + mutex_enter(entry->dbe_lock);
813 887 if (entry->dbe_refcnt == 0) {
814 888 DEQUEUE(bp->dbk_head, t);
889 + mutex_exit(entry->dbe_lock);
815 890 t->next = NULL;
816 891 t->prev = NULL;
817 892 INVALIDATE_ADDR(t->entry);
818 893 rfs4_dbe_destroy(entry);
819 - }
894 + } else
895 + mutex_exit(entry->dbe_lock);
820 896 }
821 897 }
822 898 rw_exit(bp->dbk_lock);
823 899 /*
824 900 * delay slightly if there is more work to do
825 901 * with the expectation that other reaper
826 902 * threads are freeing data structures as well
827 903 * and in turn will reduce ref counts on
828 904 * entries in this table allowing them to be
829 905 * released. This is only done in the
830 906 * instance that the tables are being shut down.
831 907 */
832 - if (table->dbt_reaper_shutdown && bp->dbk_head != NULL)
908 + if (table->dbt_reaper_shutdown && bp->dbk_head != NULL) {
833 909 delay(hz/100);
910 + retries++;
911 + }
834 912 /*
835 913 * If this is a table shutdown, keep going until
836 914 * everything is gone
837 915 */
838 - } while (table->dbt_reaper_shutdown && bp->dbk_head != NULL);
916 + } while (table->dbt_reaper_shutdown && bp->dbk_head != NULL && retries < 5);
839 917
840 918 if (!table->dbt_reaper_shutdown && desired && count >= desired)
841 919 break;
842 920 }
843 921
844 922 NFS4_DEBUG(table->dbt_debug & REAP_DEBUG,
845 923 (CE_NOTE, "Reaped %d entries older than %ld seconds in table %s",
846 924 count, cache_time, table->dbt_name));
847 925 }
848 926
849 927 static void
850 928 reaper_thread(caddr_t *arg)
851 929 {
852 930 rfs4_table_t *table = (rfs4_table_t *)arg;
853 931 clock_t rc;
854 932
855 933 NFS4_DEBUG(table->dbt_debug,
856 934 (CE_NOTE, "rfs4_reaper_thread starting for %s", table->dbt_name));
857 935
858 936 CALLB_CPR_INIT(&table->dbt_reaper_cpr_info, &table->dbt_reaper_cv_lock,
859 937 callb_generic_cpr, "nfsv4Reaper");
860 938
861 939 mutex_enter(&table->dbt_reaper_cv_lock);
862 940 do {
863 941 CALLB_CPR_SAFE_BEGIN(&table->dbt_reaper_cpr_info);
864 942 rc = cv_reltimedwait_sig(&table->dbt_reaper_wait,
865 943 &table->dbt_reaper_cv_lock,
866 944 SEC_TO_TICK(table->dbt_id_reap), TR_CLOCK_TICK);
867 945 CALLB_CPR_SAFE_END(&table->dbt_reaper_cpr_info,
868 946 &table->dbt_reaper_cv_lock);
869 947 rfs4_dbe_reap(table, table->dbt_max_cache_time, 0);
870 948 } while (rc != 0 && table->dbt_reaper_shutdown == FALSE);
871 949
|
↓ open down ↓ |
23 lines elided |
↑ open up ↑ |
872 950 CALLB_CPR_EXIT(&table->dbt_reaper_cpr_info);
873 951
874 952 NFS4_DEBUG(table->dbt_debug,
875 953 (CE_NOTE, "rfs4_reaper_thread exiting for %s", table->dbt_name));
876 954
877 955 /* Notify the database shutdown processing that the table is shutdown */
878 956 mutex_enter(table->dbt_db->db_lock);
879 957 table->dbt_db->db_shutdown_count--;
880 958 cv_signal(&table->dbt_db->db_shutdown_wait);
881 959 mutex_exit(table->dbt_db->db_lock);
960 + zthread_exit();
882 961 }
883 962
884 963 static void
885 964 rfs4_start_reaper(rfs4_table_t *table)
886 965 {
887 966 if (table->dbt_max_cache_time == 0)
888 967 return;
889 968
890 - (void) thread_create(NULL, 0, reaper_thread, table, 0, &p0, TS_RUN,
969 + (void) zthread_create(NULL, 0, reaper_thread, table, 0,
891 970 minclsyspri);
892 971 }
893 972
894 973 #ifdef DEBUG
895 974 void
896 975 rfs4_dbe_debug(rfs4_dbe_t *entry)
897 976 {
898 977 cmn_err(CE_NOTE, "Entry %p from table %s",
899 978 (void *)entry, entry->dbe_table->dbt_name);
900 979 cmn_err(CE_CONT, "\trefcnt = %d id = %d",
901 980 entry->dbe_refcnt, entry->dbe_id);
902 981 }
903 982 #endif
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX