Print this page
Revert "NEX-20260 NFS hung in transitional state when RSF marks it maintenance"
This reverts commit 9bf6e5f740709f470ba350df64cd9f2c93f3f0a7.
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/nfs/nfs4_db.c
+++ new/usr/src/uts/common/fs/nfs/nfs4_db.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
|
↓ open down ↓ |
16 lines elided |
↑ open up ↑ |
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25
26 26 /*
27 - * Copyright 2019 Nexenta Systems, Inc.
27 + * Copyright 2018 Nexenta Systems, Inc.
28 28 */
29 29
30 30 #include <sys/systm.h>
31 31 #include <sys/cmn_err.h>
32 32 #include <sys/kmem.h>
33 33 #include <sys/disp.h>
34 34 #include <sys/id_space.h>
35 +#include <sys/atomic.h>
35 36 #include <rpc/rpc.h>
36 37 #include <nfs/nfs4.h>
37 38 #include <nfs/nfs4_db_impl.h>
38 39 #include <sys/sdt.h>
39 40
40 41 static int rfs4_reap_interval = RFS4_REAP_INTERVAL;
41 42
42 43 static void rfs4_dbe_reap(rfs4_table_t *, time_t, uint32_t);
43 44 static void rfs4_dbe_destroy(rfs4_dbe_t *);
44 45 static rfs4_dbe_t *rfs4_dbe_create(rfs4_table_t *, id_t, rfs4_entry_t);
45 46 static void rfs4_start_reaper(rfs4_table_t *);
46 47
47 48 /*
48 49 * t_lowat - integer percentage of table entries /etc/system only
49 50 * t_hiwat - integer percentage of table entries /etc/system only
50 51 * t_lreap - integer percentage of table reap time mdb or /etc/system
51 52 * t_hreap - integer percentage of table reap time mdb or /etc/system
52 53 */
53 54 uint32_t t_lowat = 50; /* reap at t_lreap when id's in use hit 50% */
54 55 uint32_t t_hiwat = 75; /* reap at t_hreap when id's in use hit 75% */
55 56 time_t t_lreap = 50; /* default to 50% of table's reap interval */
56 57 time_t t_hreap = 10; /* default to 10% of table's reap interval */
|
↓ open down ↓ |
12 lines elided |
↑ open up ↑ |
57 58
58 59 id_t
59 60 rfs4_dbe_getid(rfs4_dbe_t *entry)
60 61 {
61 62 return (entry->dbe_id);
62 63 }
63 64
64 65 void
65 66 rfs4_dbe_hold(rfs4_dbe_t *entry)
66 67 {
67 - if (!MUTEX_HELD(entry->dbe_lock)) {
68 - mutex_enter(entry->dbe_lock);
69 - entry->dbe_refcnt++;
70 - mutex_exit(entry->dbe_lock);
71 - } else {
72 - entry->dbe_refcnt++;
73 - }
68 + atomic_inc_32(&entry->dbe_refcnt);
74 69 }
75 70
76 71 /*
77 72 * rfs4_dbe_rele_nolock only decrements the reference count of the entry.
78 73 */
79 74 void
80 75 rfs4_dbe_rele_nolock(rfs4_dbe_t *entry)
81 76 {
82 - if (!MUTEX_HELD(entry->dbe_lock)) {
83 - ASSERT(entry->dbe_refcnt > 0);
84 - mutex_enter(entry->dbe_lock);
85 - entry->dbe_refcnt--;
86 - mutex_exit(entry->dbe_lock);
87 - } else {
88 - entry->dbe_refcnt--;
89 - }
77 + atomic_dec_32(&entry->dbe_refcnt);
90 78 }
91 79
92 80
93 81 uint32_t
94 82 rfs4_dbe_refcnt(rfs4_dbe_t *entry)
95 83 {
96 84 return (entry->dbe_refcnt);
97 85 }
98 86
99 87 /*
100 88 * Mark an entry such that the dbsearch will skip it.
101 89 * Caller does not want this entry to be found any longer
102 90 */
103 91 void
104 92 rfs4_dbe_invalidate(rfs4_dbe_t *entry)
105 93 {
106 - if (!MUTEX_HELD(entry->dbe_lock)) {
107 - mutex_enter(entry->dbe_lock);
108 - entry->dbe_invalid = TRUE;
109 - entry->dbe_skipsearch = TRUE;
110 - mutex_exit(entry->dbe_lock);
111 - } else {
112 - entry->dbe_invalid = TRUE;
113 - entry->dbe_skipsearch = TRUE;
114 - }
94 + entry->dbe_invalid = TRUE;
95 + entry->dbe_skipsearch = TRUE;
115 96 }
116 97
117 98 /*
118 99 * Is this entry invalid?
119 100 */
120 101 bool_t
121 102 rfs4_dbe_is_invalid(rfs4_dbe_t *entry)
122 103 {
123 104 return (entry->dbe_invalid);
124 105 }
125 106
126 107 time_t
127 108 rfs4_dbe_get_timerele(rfs4_dbe_t *entry)
128 109 {
129 110 return (entry->dbe_time_rele);
130 111 }
131 112
132 113 /*
133 114 * Use these to temporarily hide/unhide a db entry.
134 115 */
135 116 void
136 117 rfs4_dbe_hide(rfs4_dbe_t *entry)
137 118 {
138 119 rfs4_dbe_lock(entry);
139 120 entry->dbe_skipsearch = TRUE;
140 121 rfs4_dbe_unlock(entry);
141 122 }
142 123
143 124 void
144 125 rfs4_dbe_unhide(rfs4_dbe_t *entry)
145 126 {
|
↓ open down ↓ |
21 lines elided |
↑ open up ↑ |
146 127 rfs4_dbe_lock(entry);
147 128 entry->dbe_skipsearch = FALSE;
148 129 rfs4_dbe_unlock(entry);
149 130 }
150 131
151 132 void
152 133 rfs4_dbe_rele(rfs4_dbe_t *entry)
153 134 {
154 135 mutex_enter(entry->dbe_lock);
155 136 ASSERT(entry->dbe_refcnt > 1);
156 - entry->dbe_refcnt--;
137 + atomic_dec_32(&entry->dbe_refcnt);
157 138 entry->dbe_time_rele = gethrestime_sec();
158 139 mutex_exit(entry->dbe_lock);
159 140 }
160 141
161 142 void
162 143 rfs4_dbe_lock(rfs4_dbe_t *entry)
163 144 {
164 145 mutex_enter(entry->dbe_lock);
165 146 }
166 147
167 148 void
168 149 rfs4_dbe_unlock(rfs4_dbe_t *entry)
169 150 {
170 151 mutex_exit(entry->dbe_lock);
171 152 }
172 153
173 154 bool_t
174 155 rfs4_dbe_islocked(rfs4_dbe_t *entry)
175 156 {
176 157 return (mutex_owned(entry->dbe_lock));
177 158 }
178 159
179 160 clock_t
180 161 rfs4_dbe_twait(rfs4_dbe_t *entry, clock_t timeout)
181 162 {
182 163 return (cv_timedwait(entry->dbe_cv, entry->dbe_lock, timeout));
183 164 }
184 165
185 166 void
186 167 rfs4_dbe_cv_broadcast(rfs4_dbe_t *entry)
187 168 {
188 169 cv_broadcast(entry->dbe_cv);
189 170 }
190 171
191 172 /* ARGSUSED */
192 173 static int
193 174 rfs4_dbe_kmem_constructor(void *obj, void *private, int kmflag)
194 175 {
195 176 rfs4_dbe_t *entry = obj;
196 177
197 178 mutex_init(entry->dbe_lock, NULL, MUTEX_DEFAULT, NULL);
198 179 cv_init(entry->dbe_cv, NULL, CV_DEFAULT, NULL);
199 180
200 181 return (0);
201 182 }
202 183
203 184 static void
204 185 rfs4_dbe_kmem_destructor(void *obj, void *private)
205 186 {
206 187 rfs4_dbe_t *entry = obj;
207 188 /*LINTED*/
208 189 rfs4_table_t *table = private;
209 190
210 191 mutex_destroy(entry->dbe_lock);
211 192 cv_destroy(entry->dbe_cv);
212 193 }
213 194
214 195 rfs4_database_t *
215 196 rfs4_database_create(uint32_t flags)
216 197 {
217 198 rfs4_database_t *db;
218 199
219 200 db = kmem_alloc(sizeof (rfs4_database_t), KM_SLEEP);
220 201 mutex_init(db->db_lock, NULL, MUTEX_DEFAULT, NULL);
221 202 db->db_tables = NULL;
222 203 db->db_debug_flags = flags;
223 204 db->db_shutdown_count = 0;
224 205 cv_init(&db->db_shutdown_wait, NULL, CV_DEFAULT, NULL);
225 206 return (db);
226 207 }
227 208
228 209
229 210 /*
230 211 * The reaper threads that have been created for the tables in this
231 212 * database must be stopped and the entries in the tables released.
232 213 * Each table will be marked as "shutdown" and the reaper threads
233 214 * poked and they will see that a shutdown is in progress and cleanup
234 215 * and exit. This function waits for all reaper threads to stop
235 216 * before returning to the caller.
236 217 */
237 218 void
238 219 rfs4_database_shutdown(rfs4_database_t *db)
239 220 {
240 221 rfs4_table_t *table;
241 222
242 223 mutex_enter(db->db_lock);
243 224 for (table = db->db_tables; table; table = table->dbt_tnext) {
244 225 mutex_enter(&table->dbt_reaper_cv_lock);
245 226 table->dbt_reaper_shutdown = TRUE;
246 227 cv_broadcast(&table->dbt_reaper_wait);
247 228 db->db_shutdown_count++;
248 229 mutex_exit(&table->dbt_reaper_cv_lock);
249 230 }
250 231 while (db->db_shutdown_count > 0) {
251 232 cv_wait(&db->db_shutdown_wait, db->db_lock);
252 233 }
253 234 mutex_exit(db->db_lock);
254 235 }
255 236
256 237 /*
257 238 * Given a database that has been "shutdown" by the function above all
258 239 * of the table tables are destroyed and then the database itself
259 240 * freed.
260 241 */
261 242 void
262 243 rfs4_database_destroy(rfs4_database_t *db)
263 244 {
264 245 rfs4_table_t *next, *tmp;
265 246
266 247 for (next = db->db_tables; next; ) {
267 248 tmp = next;
268 249 next = tmp->dbt_tnext;
269 250 rfs4_table_destroy(db, tmp);
270 251 }
271 252
272 253 mutex_destroy(db->db_lock);
273 254 kmem_free(db, sizeof (rfs4_database_t));
274 255 }
275 256
276 257 /*
277 258 * Used to get the correct kmem_cache database for the state table being
278 259 * created.
279 260 * Helper function for rfs4_table_create
280 261 */
281 262 static kmem_cache_t *
282 263 get_db_mem_cache(char *name)
283 264 {
284 265 int i;
285 266
286 267 for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) {
287 268 if (strcmp(name, rfs4_db_mem_cache_table[i].r_db_name) == 0)
288 269 return (rfs4_db_mem_cache_table[i].r_db_mem_cache);
289 270 }
290 271 /*
291 272 * There is no associated kmem cache for this NFS4 server state
292 273 * table name
293 274 */
294 275 return (NULL);
295 276 }
296 277
297 278 /*
298 279 * Used to initialize the global NFSv4 server state database.
299 280 * Helper funtion for rfs4_state_g_init and called when module is loaded.
300 281 */
301 282 kmem_cache_t *
302 283 /* CSTYLED */
303 284 nfs4_init_mem_cache(char *cache_name, uint32_t idxcnt, uint32_t size, uint32_t idx)
304 285 {
305 286 kmem_cache_t *mem_cache = kmem_cache_create(cache_name,
306 287 sizeof (rfs4_dbe_t) + idxcnt * sizeof (rfs4_link_t) + size,
307 288 0,
308 289 rfs4_dbe_kmem_constructor,
309 290 rfs4_dbe_kmem_destructor,
310 291 NULL,
311 292 NULL,
312 293 NULL,
313 294 0);
314 295 (void) strlcpy(rfs4_db_mem_cache_table[idx].r_db_name, cache_name,
315 296 strlen(cache_name) + 1);
316 297 rfs4_db_mem_cache_table[idx].r_db_mem_cache = mem_cache;
317 298 return (mem_cache);
318 299 }
319 300
320 301 rfs4_table_t *
321 302 rfs4_table_create(rfs4_database_t *db, char *tabname, time_t max_cache_time,
322 303 uint32_t idxcnt, bool_t (*create)(rfs4_entry_t, void *),
323 304 void (*destroy)(rfs4_entry_t),
324 305 bool_t (*expiry)(rfs4_entry_t),
325 306 uint32_t size, uint32_t hashsize,
326 307 uint32_t maxentries, id_t start)
327 308 {
328 309 rfs4_table_t *table;
329 310 int len;
330 311 char *cache_name;
331 312 char *id_name;
332 313
333 314 table = kmem_alloc(sizeof (rfs4_table_t), KM_SLEEP);
334 315 table->dbt_db = db;
335 316 rw_init(table->dbt_t_lock, NULL, RW_DEFAULT, NULL);
336 317 mutex_init(table->dbt_lock, NULL, MUTEX_DEFAULT, NULL);
337 318 mutex_init(&table->dbt_reaper_cv_lock, NULL, MUTEX_DEFAULT, NULL);
338 319 cv_init(&table->dbt_reaper_wait, NULL, CV_DEFAULT, NULL);
339 320
340 321 len = strlen(tabname);
341 322 table->dbt_name = kmem_alloc(len+1, KM_SLEEP);
342 323 cache_name = kmem_alloc(len + 12 /* "_entry_cache" */ + 1, KM_SLEEP);
343 324 (void) strcpy(table->dbt_name, tabname);
344 325 (void) sprintf(cache_name, "%s_entry_cache", table->dbt_name);
345 326 table->dbt_max_cache_time = max_cache_time;
346 327 table->dbt_usize = size;
347 328 table->dbt_len = hashsize;
348 329 table->dbt_count = 0;
349 330 table->dbt_idxcnt = 0;
350 331 table->dbt_ccnt = 0;
351 332 table->dbt_maxcnt = idxcnt;
352 333 table->dbt_indices = NULL;
353 334 table->dbt_id_space = NULL;
354 335 table->dbt_reaper_shutdown = FALSE;
355 336
356 337 if (start >= 0) {
357 338 if (maxentries + (uint32_t)start > (uint32_t)INT32_MAX)
358 339 maxentries = INT32_MAX - start;
359 340 id_name = kmem_alloc(len + 9 /* "_id_space" */ + 1, KM_SLEEP);
360 341 (void) sprintf(id_name, "%s_id_space", table->dbt_name);
361 342 table->dbt_id_space = id_space_create(id_name, start,
362 343 maxentries + start);
363 344 kmem_free(id_name, len + 10);
364 345 }
365 346 ASSERT(t_lowat != 0);
366 347 table->dbt_id_lwat = (maxentries * t_lowat) / 100;
367 348 ASSERT(t_hiwat != 0);
368 349 table->dbt_id_hwat = (maxentries * t_hiwat) / 100;
369 350 table->dbt_id_reap = MIN(rfs4_reap_interval, max_cache_time);
370 351 table->dbt_maxentries = maxentries;
371 352 table->dbt_create = create;
372 353 table->dbt_destroy = destroy;
373 354 table->dbt_expiry = expiry;
374 355
375 356 /*
376 357 * get the correct kmem_cache for this table type based on the name.
377 358 */
378 359 table->dbt_mem_cache = get_db_mem_cache(cache_name);
379 360
380 361 kmem_free(cache_name, len+13);
381 362
382 363 table->dbt_debug = db->db_debug_flags;
383 364
384 365 mutex_enter(db->db_lock);
385 366 table->dbt_tnext = db->db_tables;
386 367 db->db_tables = table;
387 368 mutex_exit(db->db_lock);
388 369
389 370 rfs4_start_reaper(table);
390 371
391 372 return (table);
392 373 }
393 374
394 375 void
395 376 rfs4_table_destroy(rfs4_database_t *db, rfs4_table_t *table)
396 377 {
397 378 rfs4_table_t *p;
398 379 rfs4_index_t *idx;
399 380
400 381 ASSERT(table->dbt_count == 0);
401 382
402 383 mutex_enter(db->db_lock);
403 384 if (table == db->db_tables)
404 385 db->db_tables = table->dbt_tnext;
405 386 else {
406 387 for (p = db->db_tables; p; p = p->dbt_tnext)
407 388 if (p->dbt_tnext == table) {
408 389 p->dbt_tnext = table->dbt_tnext;
409 390 table->dbt_tnext = NULL;
410 391 break;
411 392 }
412 393 ASSERT(p != NULL);
413 394 }
414 395 mutex_exit(db->db_lock);
415 396
416 397 /* Destroy indices */
417 398 while (table->dbt_indices) {
418 399 idx = table->dbt_indices;
419 400 table->dbt_indices = idx->dbi_inext;
420 401 rfs4_index_destroy(idx);
421 402 }
422 403
423 404 rw_destroy(table->dbt_t_lock);
424 405 mutex_destroy(table->dbt_lock);
425 406 mutex_destroy(&table->dbt_reaper_cv_lock);
426 407 cv_destroy(&table->dbt_reaper_wait);
427 408
428 409 kmem_free(table->dbt_name, strlen(table->dbt_name) + 1);
429 410 if (table->dbt_id_space)
430 411 id_space_destroy(table->dbt_id_space);
431 412 table->dbt_mem_cache = NULL;
432 413 kmem_free(table, sizeof (rfs4_table_t));
433 414 }
434 415
435 416 rfs4_index_t *
436 417 rfs4_index_create(rfs4_table_t *table, char *keyname,
437 418 uint32_t (*hash)(void *),
438 419 bool_t (compare)(rfs4_entry_t, void *),
439 420 void *(*mkkey)(rfs4_entry_t),
440 421 bool_t createable)
441 422 {
442 423 rfs4_index_t *idx;
443 424
444 425 ASSERT(table->dbt_idxcnt < table->dbt_maxcnt);
445 426
446 427 idx = kmem_alloc(sizeof (rfs4_index_t), KM_SLEEP);
447 428
448 429 idx->dbi_table = table;
449 430 idx->dbi_keyname = kmem_alloc(strlen(keyname) + 1, KM_SLEEP);
450 431 (void) strcpy(idx->dbi_keyname, keyname);
451 432 idx->dbi_hash = hash;
452 433 idx->dbi_compare = compare;
453 434 idx->dbi_mkkey = mkkey;
454 435 idx->dbi_tblidx = table->dbt_idxcnt;
455 436 table->dbt_idxcnt++;
456 437 if (createable) {
457 438 table->dbt_ccnt++;
458 439 if (table->dbt_ccnt > 1)
459 440 panic("Table %s currently can have only have one "
460 441 "index that will allow creation of entries",
461 442 table->dbt_name);
462 443 idx->dbi_createable = TRUE;
463 444 } else {
464 445 idx->dbi_createable = FALSE;
465 446 }
466 447
467 448 idx->dbi_inext = table->dbt_indices;
468 449 table->dbt_indices = idx;
469 450 idx->dbi_buckets = kmem_zalloc(sizeof (rfs4_bucket_t) * table->dbt_len,
470 451 KM_SLEEP);
471 452
472 453 return (idx);
473 454 }
474 455
475 456 void
476 457 rfs4_index_destroy(rfs4_index_t *idx)
477 458 {
478 459 kmem_free(idx->dbi_keyname, strlen(idx->dbi_keyname) + 1);
479 460 kmem_free(idx->dbi_buckets,
480 461 sizeof (rfs4_bucket_t) * idx->dbi_table->dbt_len);
481 462 kmem_free(idx, sizeof (rfs4_index_t));
482 463 }
483 464
484 465 static void
485 466 rfs4_dbe_destroy(rfs4_dbe_t *entry)
486 467 {
487 468 rfs4_index_t *idx;
488 469 void *key;
489 470 int i;
490 471 rfs4_bucket_t *bp;
491 472 rfs4_table_t *table = entry->dbe_table;
492 473 rfs4_link_t *l;
493 474
494 475 NFS4_DEBUG(table->dbt_debug & DESTROY_DEBUG,
495 476 (CE_NOTE, "Destroying entry %p from %s",
496 477 (void*)entry, table->dbt_name));
497 478
498 479 mutex_enter(entry->dbe_lock);
499 480 ASSERT(entry->dbe_refcnt == 0);
500 481 mutex_exit(entry->dbe_lock);
501 482
502 483 /* Unlink from all indices */
503 484 for (idx = table->dbt_indices; idx; idx = idx->dbi_inext) {
504 485 l = &entry->dbe_indices[idx->dbi_tblidx];
505 486 /* check and see if we were ever linked in to the index */
506 487 if (INVALID_LINK(l)) {
507 488 ASSERT(l->next == NULL && l->prev == NULL);
508 489 continue;
509 490 }
510 491 key = idx->dbi_mkkey(entry->dbe_data);
511 492 i = HASH(idx, key);
512 493 bp = &idx->dbi_buckets[i];
513 494 ASSERT(bp->dbk_head != NULL);
514 495 DEQUEUE_IDX(bp, &entry->dbe_indices[idx->dbi_tblidx]);
515 496 }
516 497
517 498 /* Destroy user data */
518 499 if (table->dbt_destroy)
519 500 (*table->dbt_destroy)(entry->dbe_data);
520 501
521 502 if (table->dbt_id_space)
522 503 id_free(table->dbt_id_space, entry->dbe_id);
523 504
524 505 mutex_enter(table->dbt_lock);
525 506 table->dbt_count--;
526 507 mutex_exit(table->dbt_lock);
527 508
528 509 /* Destroy the entry itself */
529 510 kmem_cache_free(table->dbt_mem_cache, entry);
530 511 }
531 512
532 513
533 514 static rfs4_dbe_t *
534 515 rfs4_dbe_create(rfs4_table_t *table, id_t id, rfs4_entry_t data)
535 516 {
536 517 rfs4_dbe_t *entry;
537 518 int i;
538 519
539 520 NFS4_DEBUG(table->dbt_debug & CREATE_DEBUG,
540 521 (CE_NOTE, "Creating entry in table %s", table->dbt_name));
541 522
542 523 entry = kmem_cache_alloc(table->dbt_mem_cache, KM_SLEEP);
543 524
544 525 entry->dbe_refcnt = 1;
545 526 entry->dbe_invalid = FALSE;
546 527 entry->dbe_skipsearch = FALSE;
547 528 entry->dbe_time_rele = 0;
548 529 entry->dbe_id = 0;
549 530
550 531 if (table->dbt_id_space)
551 532 entry->dbe_id = id;
552 533 entry->dbe_table = table;
553 534
554 535 for (i = 0; i < table->dbt_maxcnt; i++) {
555 536 entry->dbe_indices[i].next = entry->dbe_indices[i].prev = NULL;
556 537 entry->dbe_indices[i].entry = entry;
557 538 /*
558 539 * We mark the entry as not indexed by setting the low
559 540 * order bit, since address are word aligned. This has
560 541 * the advantage of causeing a trap if the address is
561 542 * used. After the entry is linked in to the
562 543 * corresponding index the bit will be cleared.
563 544 */
564 545 INVALIDATE_ADDR(entry->dbe_indices[i].entry);
565 546 }
566 547
567 548 entry->dbe_data = (rfs4_entry_t)&entry->dbe_indices[table->dbt_maxcnt];
568 549 bzero(entry->dbe_data, table->dbt_usize);
569 550 entry->dbe_data->dbe = entry;
570 551
571 552 if (!(*table->dbt_create)(entry->dbe_data, data)) {
572 553 kmem_cache_free(table->dbt_mem_cache, entry);
573 554 return (NULL);
574 555 }
575 556
576 557 mutex_enter(table->dbt_lock);
577 558 table->dbt_count++;
578 559 mutex_exit(table->dbt_lock);
579 560
580 561 return (entry);
581 562 }
582 563
583 564 static void
584 565 rfs4_dbe_tabreap_adjust(rfs4_table_t *table)
585 566 {
586 567 clock_t tabreap;
587 568 clock_t reap_int;
588 569 uint32_t in_use;
589 570
590 571 /*
591 572 * Adjust the table's reap interval based on the
592 573 * number of id's currently in use. Each table's
593 574 * default remains the same if id usage subsides.
594 575 */
595 576 ASSERT(MUTEX_HELD(&table->dbt_reaper_cv_lock));
596 577 tabreap = MIN(rfs4_reap_interval, table->dbt_max_cache_time);
597 578
598 579 in_use = table->dbt_count + 1; /* see rfs4_dbe_create */
599 580 if (in_use >= table->dbt_id_hwat) {
600 581 ASSERT(t_hreap != 0);
601 582 reap_int = (tabreap * t_hreap) / 100;
602 583 } else if (in_use >= table->dbt_id_lwat) {
603 584 ASSERT(t_lreap != 0);
604 585 reap_int = (tabreap * t_lreap) / 100;
605 586 } else {
606 587 reap_int = tabreap;
607 588 }
608 589 table->dbt_id_reap = reap_int;
609 590 DTRACE_PROBE2(table__reap__interval, char *,
610 591 table->dbt_name, time_t, table->dbt_id_reap);
611 592 }
612 593
613 594 rfs4_entry_t
614 595 rfs4_dbsearch(rfs4_index_t *idx, void *key, bool_t *create, void *arg,
615 596 rfs4_dbsearch_type_t dbsearch_type)
616 597 {
617 598 int already_done;
618 599 uint32_t i;
619 600 rfs4_table_t *table = idx->dbi_table;
620 601 rfs4_index_t *ip;
621 602 rfs4_bucket_t *bp;
622 603 rfs4_link_t *l;
623 604 rfs4_dbe_t *entry;
624 605 id_t id = -1;
625 606
626 607 i = HASH(idx, key);
627 608 bp = &idx->dbi_buckets[i];
628 609
629 610 NFS4_DEBUG(table->dbt_debug & SEARCH_DEBUG,
630 611 (CE_NOTE, "Searching for key %p in table %s by %s",
631 612 key, table->dbt_name, idx->dbi_keyname));
632 613
633 614 rw_enter(bp->dbk_lock, RW_READER);
634 615 retry:
635 616 for (l = bp->dbk_head; l; l = l->next) {
636 617 if (l->entry->dbe_refcnt > 0 &&
637 618 (l->entry->dbe_skipsearch == FALSE ||
638 619 (l->entry->dbe_skipsearch == TRUE &&
639 620 dbsearch_type == RFS4_DBS_INVALID)) &&
640 621 (*idx->dbi_compare)(l->entry->dbe_data, key)) {
641 622 mutex_enter(l->entry->dbe_lock);
642 623 if (l->entry->dbe_refcnt == 0) {
643 624 mutex_exit(l->entry->dbe_lock);
644 625 continue;
645 626 }
646 627
647 628 /* place an additional hold since we are returning */
648 629 rfs4_dbe_hold(l->entry);
649 630
650 631 mutex_exit(l->entry->dbe_lock);
651 632 rw_exit(bp->dbk_lock);
652 633
653 634 *create = FALSE;
654 635
655 636 NFS4_DEBUG((table->dbt_debug & SEARCH_DEBUG),
656 637 (CE_NOTE, "Found entry %p for %p in table %s",
657 638 (void *)l->entry, key, table->dbt_name));
658 639
659 640 if (id != -1)
660 641 id_free(table->dbt_id_space, id);
661 642 return (l->entry->dbe_data);
662 643 }
663 644 }
664 645
665 646 if (!*create || table->dbt_create == NULL || !idx->dbi_createable ||
666 647 table->dbt_maxentries == table->dbt_count) {
667 648 NFS4_DEBUG(table->dbt_debug & SEARCH_DEBUG,
668 649 (CE_NOTE, "Entry for %p in %s not found",
669 650 key, table->dbt_name));
670 651
671 652 rw_exit(bp->dbk_lock);
672 653 if (id != -1)
673 654 id_free(table->dbt_id_space, id);
674 655 return (NULL);
675 656 }
676 657
677 658 if (table->dbt_id_space && id == -1) {
678 659 rw_exit(bp->dbk_lock);
679 660
680 661 /* get an id, ok to sleep for it here */
681 662 id = id_alloc(table->dbt_id_space);
682 663 ASSERT(id != -1);
683 664
684 665 mutex_enter(&table->dbt_reaper_cv_lock);
685 666 rfs4_dbe_tabreap_adjust(table);
686 667 mutex_exit(&table->dbt_reaper_cv_lock);
687 668
688 669 rw_enter(bp->dbk_lock, RW_WRITER);
689 670 goto retry;
690 671 }
691 672
692 673 /* get an exclusive lock on the bucket */
693 674 if (rw_read_locked(bp->dbk_lock) && !rw_tryupgrade(bp->dbk_lock)) {
694 675 NFS4_DEBUG(table->dbt_debug & OTHER_DEBUG,
695 676 (CE_NOTE, "Trying to upgrade lock on "
696 677 "hash chain %d (%p) for %s by %s",
697 678 i, (void*)bp, table->dbt_name, idx->dbi_keyname));
698 679
699 680 rw_exit(bp->dbk_lock);
700 681 rw_enter(bp->dbk_lock, RW_WRITER);
701 682 goto retry;
702 683 }
703 684
704 685 /* create entry */
705 686 entry = rfs4_dbe_create(table, id, arg);
706 687 if (entry == NULL) {
707 688 rw_exit(bp->dbk_lock);
708 689 if (id != -1)
709 690 id_free(table->dbt_id_space, id);
710 691
711 692 NFS4_DEBUG(table->dbt_debug & CREATE_DEBUG,
712 693 (CE_NOTE, "Constructor for table %s failed",
713 694 table->dbt_name));
714 695 return (NULL);
715 696 }
716 697
717 698 /*
718 699 * Add one ref for entry into table's hash - only one
719 700 * reference added even though there may be multiple indices
720 701 */
721 702 rfs4_dbe_hold(entry);
722 703 ENQUEUE(bp->dbk_head, &entry->dbe_indices[idx->dbi_tblidx]);
723 704 VALIDATE_ADDR(entry->dbe_indices[idx->dbi_tblidx].entry);
724 705
725 706 already_done = idx->dbi_tblidx;
726 707 rw_exit(bp->dbk_lock);
727 708
728 709 for (ip = table->dbt_indices; ip; ip = ip->dbi_inext) {
729 710 if (ip->dbi_tblidx == already_done)
730 711 continue;
731 712 l = &entry->dbe_indices[ip->dbi_tblidx];
732 713 i = HASH(ip, ip->dbi_mkkey(entry->dbe_data));
733 714 ASSERT(i < ip->dbi_table->dbt_len);
734 715 bp = &ip->dbi_buckets[i];
735 716 ENQUEUE_IDX(bp, l);
736 717 }
737 718
738 719 NFS4_DEBUG(
739 720 table->dbt_debug & SEARCH_DEBUG || table->dbt_debug & CREATE_DEBUG,
740 721 (CE_NOTE, "Entry %p created for %s = %p in table %s",
741 722 (void*)entry, idx->dbi_keyname, (void*)key, table->dbt_name));
742 723
743 724 return (entry->dbe_data);
744 725 }
745 726
746 727 /*ARGSUSED*/
747 728 boolean_t
748 729 rfs4_cpr_callb(void *arg, int code)
749 730 {
750 731 rfs4_bucket_t *buckets, *bp;
751 732 rfs4_link_t *l;
752 733 rfs4_client_t *cp;
753 734 int i;
754 735
755 736 nfs4_srv_t *nsrv4 = nfs4_get_srv();
756 737 rfs4_table_t *table = nsrv4->rfs4_client_tab;
757 738
758 739 /*
759 740 * We get called for Suspend and Resume events.
760 741 * For the suspend case we simply don't care! Nor do we care if
761 742 * there are no clients.
762 743 */
763 744 if (code == CB_CODE_CPR_CHKPT || table == NULL) {
764 745 return (B_TRUE);
765 746 }
766 747
767 748 buckets = table->dbt_indices->dbi_buckets;
768 749
769 750 /*
770 751 * When we get this far we are in the process of
771 752 * resuming the system from a previous suspend.
772 753 *
773 754 * We are going to blast through and update the
774 755 * last_access time for all the clients and in
775 756 * doing so extend them by one lease period.
776 757 */
777 758 for (i = 0; i < table->dbt_len; i++) {
778 759 bp = &buckets[i];
779 760 for (l = bp->dbk_head; l; l = l->next) {
780 761 cp = (rfs4_client_t *)l->entry->dbe_data;
781 762 cp->rc_last_access = gethrestime_sec();
782 763 }
783 764 }
784 765
785 766 return (B_TRUE);
786 767 }
787 768
788 769 /*
789 770 * Given a table, lock each of the buckets and walk all entries (in
790 771 * turn locking those) and calling the provided "callout" function
791 772 * with the provided parameter. Obviously used to iterate across all
792 773 * entries in a particular table via the database locking hierarchy.
793 774 * Obviously the caller must not hold locks on any of the entries in
794 775 * the specified table.
795 776 */
796 777 void
797 778 rfs4_dbe_walk(rfs4_table_t *table,
798 779 void (*callout)(rfs4_entry_t, void *),
799 780 void *data)
800 781 {
801 782 rfs4_bucket_t *buckets = table->dbt_indices->dbi_buckets, *bp;
802 783 rfs4_link_t *l;
803 784 rfs4_dbe_t *entry;
804 785 int i;
805 786
806 787 NFS4_DEBUG(table->dbt_debug & WALK_DEBUG,
807 788 (CE_NOTE, "Walking entries in %s", table->dbt_name));
808 789
809 790 /* Walk the buckets looking for entries to release/destroy */
810 791 for (i = 0; i < table->dbt_len; i++) {
811 792 bp = &buckets[i];
812 793 rw_enter(bp->dbk_lock, RW_READER);
813 794 for (l = bp->dbk_head; l; l = l->next) {
814 795 entry = l->entry;
815 796 mutex_enter(entry->dbe_lock);
816 797 (*callout)(entry->dbe_data, data);
817 798 mutex_exit(entry->dbe_lock);
818 799 }
819 800 rw_exit(bp->dbk_lock);
820 801 }
821 802
822 803 NFS4_DEBUG(table->dbt_debug & WALK_DEBUG,
823 804 (CE_NOTE, "Walking entries complete %s", table->dbt_name));
824 805 }
825 806
826 807
827 808 static void
828 809 rfs4_dbe_reap(rfs4_table_t *table, time_t cache_time, uint32_t desired)
829 810 {
830 811 rfs4_index_t *idx = table->dbt_indices;
831 812 rfs4_bucket_t *buckets = idx->dbi_buckets, *bp;
832 813 rfs4_link_t *l, *t;
833 814 rfs4_dbe_t *entry;
|
↓ open down ↓ |
667 lines elided |
↑ open up ↑ |
834 815 bool_t found;
835 816 int i;
836 817 int count = 0;
837 818
838 819 NFS4_DEBUG(table->dbt_debug & REAP_DEBUG,
839 820 (CE_NOTE, "Reaping %d entries older than %ld seconds in table %s",
840 821 desired, cache_time, table->dbt_name));
841 822
842 823 /* Walk the buckets looking for entries to release/destroy */
843 824 for (i = 0; i < table->dbt_len; i++) {
844 - int retries = 0;
845 825 bp = &buckets[i];
846 826 do {
847 827 found = FALSE;
848 828 rw_enter(bp->dbk_lock, RW_READER);
849 829 for (l = bp->dbk_head; l; l = l->next) {
850 830 entry = l->entry;
851 - mutex_enter(entry->dbe_lock);
852 - ASSERT(entry->dbe_refcnt != 0);
853 831 /*
854 832 * Examine an entry. Ref count of 1 means
855 833 * that the only reference is for the hash
856 834 * table reference.
857 835 */
858 - if (entry->dbe_refcnt != 1) {
859 - mutex_exit(entry->dbe_lock);
836 + if (entry->dbe_refcnt != 1)
860 837 continue;
861 - }
838 + mutex_enter(entry->dbe_lock);
862 839 if ((entry->dbe_refcnt == 1) &&
863 840 (table->dbt_reaper_shutdown ||
864 841 table->dbt_expiry == NULL ||
865 842 (*table->dbt_expiry)(entry->dbe_data))) {
866 - rfs4_dbe_rele_nolock(entry);
843 + entry->dbe_refcnt--;
867 844 count++;
868 845 found = TRUE;
869 846 }
870 847 mutex_exit(entry->dbe_lock);
871 848 }
872 849 if (found) {
873 850 if (!rw_tryupgrade(bp->dbk_lock)) {
874 851 rw_exit(bp->dbk_lock);
875 852 rw_enter(bp->dbk_lock, RW_WRITER);
876 853 }
877 854
878 855 l = bp->dbk_head;
879 856 while (l) {
880 857 t = l;
881 858 entry = t->entry;
882 859 l = l->next;
883 - mutex_enter(entry->dbe_lock);
884 860 if (entry->dbe_refcnt == 0) {
885 861 DEQUEUE(bp->dbk_head, t);
886 - mutex_exit(entry->dbe_lock);
887 862 t->next = NULL;
888 863 t->prev = NULL;
889 864 INVALIDATE_ADDR(t->entry);
890 865 rfs4_dbe_destroy(entry);
891 - } else
892 - mutex_exit(entry->dbe_lock);
866 + }
893 867 }
894 868 }
895 869 rw_exit(bp->dbk_lock);
896 870 /*
897 871 * delay slightly if there is more work to do
898 872 * with the expectation that other reaper
899 873 * threads are freeing data structures as well
900 874 * and in turn will reduce ref counts on
901 875 * entries in this table allowing them to be
902 876 * released. This is only done in the
903 877 * instance that the tables are being shut down.
904 878 */
905 - if (table->dbt_reaper_shutdown && bp->dbk_head != NULL) {
879 + if (table->dbt_reaper_shutdown && bp->dbk_head != NULL)
906 880 delay(hz/100);
907 - retries++;
908 - }
909 881 /*
910 882 * If this is a table shutdown, keep going until
911 883 * everything is gone
912 884 */
913 - } while (table->dbt_reaper_shutdown && bp->dbk_head != NULL && retries < 5);
885 + } while (table->dbt_reaper_shutdown && bp->dbk_head != NULL);
914 886
915 887 if (!table->dbt_reaper_shutdown && desired && count >= desired)
916 888 break;
917 889 }
918 890
919 891 NFS4_DEBUG(table->dbt_debug & REAP_DEBUG,
920 892 (CE_NOTE, "Reaped %d entries older than %ld seconds in table %s",
921 893 count, cache_time, table->dbt_name));
922 894 }
923 895
924 896 static void
925 897 reaper_thread(caddr_t *arg)
926 898 {
927 899 rfs4_table_t *table = (rfs4_table_t *)arg;
928 900 clock_t rc;
929 901
930 902 NFS4_DEBUG(table->dbt_debug,
931 903 (CE_NOTE, "rfs4_reaper_thread starting for %s", table->dbt_name));
932 904
933 905 CALLB_CPR_INIT(&table->dbt_reaper_cpr_info, &table->dbt_reaper_cv_lock,
934 906 callb_generic_cpr, "nfsv4Reaper");
935 907
936 908 mutex_enter(&table->dbt_reaper_cv_lock);
937 909 do {
938 910 CALLB_CPR_SAFE_BEGIN(&table->dbt_reaper_cpr_info);
939 911 rc = cv_reltimedwait_sig(&table->dbt_reaper_wait,
940 912 &table->dbt_reaper_cv_lock,
941 913 SEC_TO_TICK(table->dbt_id_reap), TR_CLOCK_TICK);
942 914 CALLB_CPR_SAFE_END(&table->dbt_reaper_cpr_info,
943 915 &table->dbt_reaper_cv_lock);
944 916 rfs4_dbe_reap(table, table->dbt_max_cache_time, 0);
945 917 } while (rc != 0 && table->dbt_reaper_shutdown == FALSE);
946 918
947 919 CALLB_CPR_EXIT(&table->dbt_reaper_cpr_info);
948 920
949 921 NFS4_DEBUG(table->dbt_debug,
950 922 (CE_NOTE, "rfs4_reaper_thread exiting for %s", table->dbt_name));
951 923
952 924 /* Notify the database shutdown processing that the table is shutdown */
953 925 mutex_enter(table->dbt_db->db_lock);
954 926 table->dbt_db->db_shutdown_count--;
955 927 cv_signal(&table->dbt_db->db_shutdown_wait);
956 928 mutex_exit(table->dbt_db->db_lock);
957 929 zthread_exit();
958 930 }
959 931
960 932 static void
961 933 rfs4_start_reaper(rfs4_table_t *table)
962 934 {
963 935 if (table->dbt_max_cache_time == 0)
964 936 return;
965 937
966 938 (void) zthread_create(NULL, 0, reaper_thread, table, 0,
967 939 minclsyspri);
968 940 }
969 941
970 942 #ifdef DEBUG
971 943 void
972 944 rfs4_dbe_debug(rfs4_dbe_t *entry)
973 945 {
974 946 cmn_err(CE_NOTE, "Entry %p from table %s",
975 947 (void *)entry, entry->dbe_table->dbt_name);
976 948 cmn_err(CE_CONT, "\trefcnt = %d id = %d",
977 949 entry->dbe_refcnt, entry->dbe_id);
978 950 }
979 951 #endif
|
↓ open down ↓ |
56 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX