Print this page
Fix NFS design problems re. multiple zone keys
Make NFS server zone-specific data all have the same lifetime
Fix rfs4_clean_state_exi
Fix exi_cache_reclaim
Fix mistakes in zone keys work
More fixes re. exi_zoneid and exi_tree
(danmcd -> Keep some ASSERT()s around for readability.)
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/nfs/nfs4_db.c
+++ new/usr/src/uts/common/fs/nfs/nfs4_db.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25
26 26 /*
27 27 * Copyright 2019 Nexenta Systems, Inc.
28 28 */
29 29
30 30 #include <sys/systm.h>
31 31 #include <sys/cmn_err.h>
32 32 #include <sys/kmem.h>
33 33 #include <sys/disp.h>
34 34 #include <sys/id_space.h>
35 35 #include <rpc/rpc.h>
36 36 #include <nfs/nfs4.h>
37 37 #include <nfs/nfs4_db_impl.h>
38 38 #include <sys/sdt.h>
39 39
40 40 static int rfs4_reap_interval = RFS4_REAP_INTERVAL;
41 41
42 42 static void rfs4_dbe_reap(rfs4_table_t *, time_t, uint32_t);
43 43 static void rfs4_dbe_destroy(rfs4_dbe_t *);
44 44 static rfs4_dbe_t *rfs4_dbe_create(rfs4_table_t *, id_t, rfs4_entry_t);
45 45 static void rfs4_start_reaper(rfs4_table_t *);
46 46
47 47 /*
48 48 * t_lowat - integer percentage of table entries /etc/system only
49 49 * t_hiwat - integer percentage of table entries /etc/system only
50 50 * t_lreap - integer percentage of table reap time mdb or /etc/system
51 51 * t_hreap - integer percentage of table reap time mdb or /etc/system
52 52 */
53 53 uint32_t t_lowat = 50; /* reap at t_lreap when id's in use hit 50% */
54 54 uint32_t t_hiwat = 75; /* reap at t_hreap when id's in use hit 75% */
55 55 time_t t_lreap = 50; /* default to 50% of table's reap interval */
56 56 time_t t_hreap = 10; /* default to 10% of table's reap interval */
57 57
58 58 id_t
59 59 rfs4_dbe_getid(rfs4_dbe_t *entry)
60 60 {
61 61 return (entry->dbe_id);
62 62 }
63 63
64 64 void
65 65 rfs4_dbe_hold(rfs4_dbe_t *entry)
66 66 {
67 67 if (!MUTEX_HELD(entry->dbe_lock)) {
68 68 mutex_enter(entry->dbe_lock);
69 69 entry->dbe_refcnt++;
70 70 mutex_exit(entry->dbe_lock);
71 71 } else {
72 72 entry->dbe_refcnt++;
73 73 }
74 74 }
75 75
76 76 /*
77 77 * rfs4_dbe_rele_nolock only decrements the reference count of the entry.
78 78 */
79 79 void
80 80 rfs4_dbe_rele_nolock(rfs4_dbe_t *entry)
81 81 {
82 82 if (!MUTEX_HELD(entry->dbe_lock)) {
83 83 ASSERT(entry->dbe_refcnt > 0);
84 84 mutex_enter(entry->dbe_lock);
85 85 entry->dbe_refcnt--;
86 86 mutex_exit(entry->dbe_lock);
87 87 } else {
88 88 entry->dbe_refcnt--;
89 89 }
90 90 }
91 91
92 92
93 93 uint32_t
94 94 rfs4_dbe_refcnt(rfs4_dbe_t *entry)
95 95 {
96 96 return (entry->dbe_refcnt);
97 97 }
98 98
99 99 /*
100 100 * Mark an entry such that the dbsearch will skip it.
101 101 * Caller does not want this entry to be found any longer
102 102 */
103 103 void
104 104 rfs4_dbe_invalidate(rfs4_dbe_t *entry)
105 105 {
106 106 if (!MUTEX_HELD(entry->dbe_lock)) {
107 107 mutex_enter(entry->dbe_lock);
108 108 entry->dbe_invalid = TRUE;
109 109 entry->dbe_skipsearch = TRUE;
110 110 mutex_exit(entry->dbe_lock);
111 111 } else {
112 112 entry->dbe_invalid = TRUE;
113 113 entry->dbe_skipsearch = TRUE;
114 114 }
115 115 }
116 116
117 117 /*
118 118 * Is this entry invalid?
119 119 */
120 120 bool_t
121 121 rfs4_dbe_is_invalid(rfs4_dbe_t *entry)
122 122 {
123 123 return (entry->dbe_invalid);
124 124 }
125 125
126 126 time_t
127 127 rfs4_dbe_get_timerele(rfs4_dbe_t *entry)
128 128 {
129 129 return (entry->dbe_time_rele);
130 130 }
131 131
132 132 /*
133 133 * Use these to temporarily hide/unhide a db entry.
134 134 */
135 135 void
136 136 rfs4_dbe_hide(rfs4_dbe_t *entry)
137 137 {
138 138 rfs4_dbe_lock(entry);
139 139 entry->dbe_skipsearch = TRUE;
140 140 rfs4_dbe_unlock(entry);
141 141 }
142 142
143 143 void
144 144 rfs4_dbe_unhide(rfs4_dbe_t *entry)
145 145 {
146 146 rfs4_dbe_lock(entry);
147 147 entry->dbe_skipsearch = FALSE;
148 148 rfs4_dbe_unlock(entry);
149 149 }
150 150
151 151 void
152 152 rfs4_dbe_rele(rfs4_dbe_t *entry)
153 153 {
154 154 mutex_enter(entry->dbe_lock);
155 155 ASSERT(entry->dbe_refcnt > 1);
156 156 entry->dbe_refcnt--;
157 157 entry->dbe_time_rele = gethrestime_sec();
158 158 mutex_exit(entry->dbe_lock);
159 159 }
160 160
161 161 void
162 162 rfs4_dbe_lock(rfs4_dbe_t *entry)
163 163 {
164 164 mutex_enter(entry->dbe_lock);
165 165 }
166 166
167 167 void
168 168 rfs4_dbe_unlock(rfs4_dbe_t *entry)
169 169 {
170 170 mutex_exit(entry->dbe_lock);
171 171 }
172 172
173 173 bool_t
174 174 rfs4_dbe_islocked(rfs4_dbe_t *entry)
175 175 {
176 176 return (mutex_owned(entry->dbe_lock));
177 177 }
178 178
179 179 clock_t
180 180 rfs4_dbe_twait(rfs4_dbe_t *entry, clock_t timeout)
181 181 {
182 182 return (cv_timedwait(entry->dbe_cv, entry->dbe_lock, timeout));
183 183 }
184 184
185 185 void
186 186 rfs4_dbe_cv_broadcast(rfs4_dbe_t *entry)
187 187 {
188 188 cv_broadcast(entry->dbe_cv);
189 189 }
190 190
191 191 /* ARGSUSED */
192 192 static int
193 193 rfs4_dbe_kmem_constructor(void *obj, void *private, int kmflag)
194 194 {
195 195 rfs4_dbe_t *entry = obj;
196 196
197 197 mutex_init(entry->dbe_lock, NULL, MUTEX_DEFAULT, NULL);
198 198 cv_init(entry->dbe_cv, NULL, CV_DEFAULT, NULL);
199 199
200 200 return (0);
201 201 }
202 202
203 203 static void
204 204 rfs4_dbe_kmem_destructor(void *obj, void *private)
205 205 {
206 206 rfs4_dbe_t *entry = obj;
207 207 /*LINTED*/
208 208 rfs4_table_t *table = private;
209 209
210 210 mutex_destroy(entry->dbe_lock);
211 211 cv_destroy(entry->dbe_cv);
212 212 }
213 213
214 214 rfs4_database_t *
215 215 rfs4_database_create(uint32_t flags)
216 216 {
217 217 rfs4_database_t *db;
218 218
219 219 db = kmem_alloc(sizeof (rfs4_database_t), KM_SLEEP);
220 220 mutex_init(db->db_lock, NULL, MUTEX_DEFAULT, NULL);
221 221 db->db_tables = NULL;
222 222 db->db_debug_flags = flags;
223 223 db->db_shutdown_count = 0;
224 224 cv_init(&db->db_shutdown_wait, NULL, CV_DEFAULT, NULL);
225 225 return (db);
226 226 }
227 227
228 228
229 229 /*
230 230 * The reaper threads that have been created for the tables in this
231 231 * database must be stopped and the entries in the tables released.
232 232 * Each table will be marked as "shutdown" and the reaper threads
233 233 * poked and they will see that a shutdown is in progress and cleanup
234 234 * and exit. This function waits for all reaper threads to stop
235 235 * before returning to the caller.
236 236 */
237 237 void
238 238 rfs4_database_shutdown(rfs4_database_t *db)
239 239 {
240 240 rfs4_table_t *table;
241 241
242 242 mutex_enter(db->db_lock);
243 243 for (table = db->db_tables; table; table = table->dbt_tnext) {
244 244 mutex_enter(&table->dbt_reaper_cv_lock);
245 245 table->dbt_reaper_shutdown = TRUE;
246 246 cv_broadcast(&table->dbt_reaper_wait);
247 247 db->db_shutdown_count++;
248 248 mutex_exit(&table->dbt_reaper_cv_lock);
249 249 }
250 250 while (db->db_shutdown_count > 0) {
251 251 cv_wait(&db->db_shutdown_wait, db->db_lock);
252 252 }
253 253 mutex_exit(db->db_lock);
254 254 }
255 255
256 256 /*
257 257 * Given a database that has been "shutdown" by the function above all
258 258 * of the table tables are destroyed and then the database itself
259 259 * freed.
260 260 */
261 261 void
262 262 rfs4_database_destroy(rfs4_database_t *db)
263 263 {
264 264 rfs4_table_t *next, *tmp;
265 265
266 266 for (next = db->db_tables; next; ) {
267 267 tmp = next;
268 268 next = tmp->dbt_tnext;
269 269 rfs4_table_destroy(db, tmp);
270 270 }
271 271
272 272 mutex_destroy(db->db_lock);
273 273 kmem_free(db, sizeof (rfs4_database_t));
274 274 }
275 275
276 276 /*
277 277 * Used to get the correct kmem_cache database for the state table being
278 278 * created.
279 279 * Helper function for rfs4_table_create
280 280 */
281 281 static kmem_cache_t *
282 282 get_db_mem_cache(char *name)
283 283 {
284 284 int i;
285 285
286 286 for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) {
287 287 if (strcmp(name, rfs4_db_mem_cache_table[i].r_db_name) == 0)
288 288 return (rfs4_db_mem_cache_table[i].r_db_mem_cache);
289 289 }
290 290 /*
291 291 * There is no associated kmem cache for this NFS4 server state
292 292 * table name
293 293 */
294 294 return (NULL);
295 295 }
296 296
297 297 /*
298 298 * Used to initialize the global NFSv4 server state database.
299 299 * Helper funtion for rfs4_state_g_init and called when module is loaded.
300 300 */
301 301 kmem_cache_t *
302 302 /* CSTYLED */
303 303 nfs4_init_mem_cache(char *cache_name, uint32_t idxcnt, uint32_t size, uint32_t idx)
304 304 {
305 305 kmem_cache_t *mem_cache = kmem_cache_create(cache_name,
306 306 sizeof (rfs4_dbe_t) + idxcnt * sizeof (rfs4_link_t) + size,
307 307 0,
308 308 rfs4_dbe_kmem_constructor,
309 309 rfs4_dbe_kmem_destructor,
310 310 NULL,
311 311 NULL,
312 312 NULL,
313 313 0);
314 314 (void) strlcpy(rfs4_db_mem_cache_table[idx].r_db_name, cache_name,
315 315 strlen(cache_name) + 1);
316 316 rfs4_db_mem_cache_table[idx].r_db_mem_cache = mem_cache;
317 317 return (mem_cache);
318 318 }
319 319
320 320 rfs4_table_t *
321 321 rfs4_table_create(rfs4_database_t *db, char *tabname, time_t max_cache_time,
322 322 uint32_t idxcnt, bool_t (*create)(rfs4_entry_t, void *),
323 323 void (*destroy)(rfs4_entry_t),
324 324 bool_t (*expiry)(rfs4_entry_t),
325 325 uint32_t size, uint32_t hashsize,
326 326 uint32_t maxentries, id_t start)
327 327 {
328 328 rfs4_table_t *table;
329 329 int len;
330 330 char *cache_name;
331 331 char *id_name;
332 332
333 333 table = kmem_alloc(sizeof (rfs4_table_t), KM_SLEEP);
334 334 table->dbt_db = db;
335 335 rw_init(table->dbt_t_lock, NULL, RW_DEFAULT, NULL);
336 336 mutex_init(table->dbt_lock, NULL, MUTEX_DEFAULT, NULL);
337 337 mutex_init(&table->dbt_reaper_cv_lock, NULL, MUTEX_DEFAULT, NULL);
338 338 cv_init(&table->dbt_reaper_wait, NULL, CV_DEFAULT, NULL);
339 339
340 340 len = strlen(tabname);
341 341 table->dbt_name = kmem_alloc(len+1, KM_SLEEP);
342 342 cache_name = kmem_alloc(len + 12 /* "_entry_cache" */ + 1, KM_SLEEP);
343 343 (void) strcpy(table->dbt_name, tabname);
344 344 (void) sprintf(cache_name, "%s_entry_cache", table->dbt_name);
345 345 table->dbt_max_cache_time = max_cache_time;
346 346 table->dbt_usize = size;
347 347 table->dbt_len = hashsize;
348 348 table->dbt_count = 0;
349 349 table->dbt_idxcnt = 0;
350 350 table->dbt_ccnt = 0;
351 351 table->dbt_maxcnt = idxcnt;
352 352 table->dbt_indices = NULL;
353 353 table->dbt_id_space = NULL;
354 354 table->dbt_reaper_shutdown = FALSE;
355 355
356 356 if (start >= 0) {
357 357 if (maxentries + (uint32_t)start > (uint32_t)INT32_MAX)
358 358 maxentries = INT32_MAX - start;
359 359 id_name = kmem_alloc(len + 9 /* "_id_space" */ + 1, KM_SLEEP);
360 360 (void) sprintf(id_name, "%s_id_space", table->dbt_name);
361 361 table->dbt_id_space = id_space_create(id_name, start,
362 362 maxentries + start);
363 363 kmem_free(id_name, len + 10);
364 364 }
365 365 ASSERT(t_lowat != 0);
366 366 table->dbt_id_lwat = (maxentries * t_lowat) / 100;
367 367 ASSERT(t_hiwat != 0);
368 368 table->dbt_id_hwat = (maxentries * t_hiwat) / 100;
369 369 table->dbt_id_reap = MIN(rfs4_reap_interval, max_cache_time);
370 370 table->dbt_maxentries = maxentries;
371 371 table->dbt_create = create;
372 372 table->dbt_destroy = destroy;
373 373 table->dbt_expiry = expiry;
374 374
375 375 /*
376 376 * get the correct kmem_cache for this table type based on the name.
377 377 */
378 378 table->dbt_mem_cache = get_db_mem_cache(cache_name);
379 379
380 380 kmem_free(cache_name, len+13);
381 381
382 382 table->dbt_debug = db->db_debug_flags;
383 383
384 384 mutex_enter(db->db_lock);
385 385 table->dbt_tnext = db->db_tables;
386 386 db->db_tables = table;
387 387 mutex_exit(db->db_lock);
388 388
389 389 rfs4_start_reaper(table);
390 390
391 391 return (table);
392 392 }
393 393
394 394 void
395 395 rfs4_table_destroy(rfs4_database_t *db, rfs4_table_t *table)
396 396 {
397 397 rfs4_table_t *p;
398 398 rfs4_index_t *idx;
399 399
400 400 ASSERT(table->dbt_count == 0);
401 401
402 402 mutex_enter(db->db_lock);
403 403 if (table == db->db_tables)
404 404 db->db_tables = table->dbt_tnext;
405 405 else {
406 406 for (p = db->db_tables; p; p = p->dbt_tnext)
407 407 if (p->dbt_tnext == table) {
408 408 p->dbt_tnext = table->dbt_tnext;
409 409 table->dbt_tnext = NULL;
410 410 break;
411 411 }
412 412 ASSERT(p != NULL);
413 413 }
414 414 mutex_exit(db->db_lock);
415 415
416 416 /* Destroy indices */
417 417 while (table->dbt_indices) {
418 418 idx = table->dbt_indices;
419 419 table->dbt_indices = idx->dbi_inext;
420 420 rfs4_index_destroy(idx);
421 421 }
422 422
423 423 rw_destroy(table->dbt_t_lock);
424 424 mutex_destroy(table->dbt_lock);
425 425 mutex_destroy(&table->dbt_reaper_cv_lock);
426 426 cv_destroy(&table->dbt_reaper_wait);
427 427
428 428 kmem_free(table->dbt_name, strlen(table->dbt_name) + 1);
429 429 if (table->dbt_id_space)
430 430 id_space_destroy(table->dbt_id_space);
431 431 table->dbt_mem_cache = NULL;
432 432 kmem_free(table, sizeof (rfs4_table_t));
433 433 }
434 434
435 435 rfs4_index_t *
436 436 rfs4_index_create(rfs4_table_t *table, char *keyname,
437 437 uint32_t (*hash)(void *),
438 438 bool_t (compare)(rfs4_entry_t, void *),
439 439 void *(*mkkey)(rfs4_entry_t),
440 440 bool_t createable)
441 441 {
442 442 rfs4_index_t *idx;
443 443
444 444 ASSERT(table->dbt_idxcnt < table->dbt_maxcnt);
445 445
446 446 idx = kmem_alloc(sizeof (rfs4_index_t), KM_SLEEP);
447 447
448 448 idx->dbi_table = table;
449 449 idx->dbi_keyname = kmem_alloc(strlen(keyname) + 1, KM_SLEEP);
450 450 (void) strcpy(idx->dbi_keyname, keyname);
451 451 idx->dbi_hash = hash;
452 452 idx->dbi_compare = compare;
453 453 idx->dbi_mkkey = mkkey;
454 454 idx->dbi_tblidx = table->dbt_idxcnt;
455 455 table->dbt_idxcnt++;
456 456 if (createable) {
457 457 table->dbt_ccnt++;
458 458 if (table->dbt_ccnt > 1)
459 459 panic("Table %s currently can have only have one "
460 460 "index that will allow creation of entries",
461 461 table->dbt_name);
462 462 idx->dbi_createable = TRUE;
463 463 } else {
464 464 idx->dbi_createable = FALSE;
465 465 }
466 466
467 467 idx->dbi_inext = table->dbt_indices;
468 468 table->dbt_indices = idx;
469 469 idx->dbi_buckets = kmem_zalloc(sizeof (rfs4_bucket_t) * table->dbt_len,
470 470 KM_SLEEP);
471 471
472 472 return (idx);
473 473 }
474 474
475 475 void
476 476 rfs4_index_destroy(rfs4_index_t *idx)
477 477 {
478 478 kmem_free(idx->dbi_keyname, strlen(idx->dbi_keyname) + 1);
479 479 kmem_free(idx->dbi_buckets,
480 480 sizeof (rfs4_bucket_t) * idx->dbi_table->dbt_len);
481 481 kmem_free(idx, sizeof (rfs4_index_t));
482 482 }
483 483
484 484 static void
485 485 rfs4_dbe_destroy(rfs4_dbe_t *entry)
486 486 {
487 487 rfs4_index_t *idx;
488 488 void *key;
489 489 int i;
490 490 rfs4_bucket_t *bp;
491 491 rfs4_table_t *table = entry->dbe_table;
492 492 rfs4_link_t *l;
493 493
494 494 NFS4_DEBUG(table->dbt_debug & DESTROY_DEBUG,
495 495 (CE_NOTE, "Destroying entry %p from %s",
496 496 (void*)entry, table->dbt_name));
497 497
498 498 mutex_enter(entry->dbe_lock);
499 499 ASSERT(entry->dbe_refcnt == 0);
500 500 mutex_exit(entry->dbe_lock);
501 501
502 502 /* Unlink from all indices */
503 503 for (idx = table->dbt_indices; idx; idx = idx->dbi_inext) {
504 504 l = &entry->dbe_indices[idx->dbi_tblidx];
505 505 /* check and see if we were ever linked in to the index */
506 506 if (INVALID_LINK(l)) {
507 507 ASSERT(l->next == NULL && l->prev == NULL);
508 508 continue;
509 509 }
510 510 key = idx->dbi_mkkey(entry->dbe_data);
511 511 i = HASH(idx, key);
512 512 bp = &idx->dbi_buckets[i];
513 513 ASSERT(bp->dbk_head != NULL);
514 514 DEQUEUE_IDX(bp, &entry->dbe_indices[idx->dbi_tblidx]);
515 515 }
516 516
517 517 /* Destroy user data */
518 518 if (table->dbt_destroy)
519 519 (*table->dbt_destroy)(entry->dbe_data);
520 520
521 521 if (table->dbt_id_space)
522 522 id_free(table->dbt_id_space, entry->dbe_id);
523 523
524 524 mutex_enter(table->dbt_lock);
525 525 table->dbt_count--;
526 526 mutex_exit(table->dbt_lock);
527 527
528 528 /* Destroy the entry itself */
529 529 kmem_cache_free(table->dbt_mem_cache, entry);
530 530 }
531 531
532 532
533 533 static rfs4_dbe_t *
534 534 rfs4_dbe_create(rfs4_table_t *table, id_t id, rfs4_entry_t data)
535 535 {
536 536 rfs4_dbe_t *entry;
537 537 int i;
538 538
539 539 NFS4_DEBUG(table->dbt_debug & CREATE_DEBUG,
540 540 (CE_NOTE, "Creating entry in table %s", table->dbt_name));
541 541
542 542 entry = kmem_cache_alloc(table->dbt_mem_cache, KM_SLEEP);
543 543
544 544 entry->dbe_refcnt = 1;
545 545 entry->dbe_invalid = FALSE;
546 546 entry->dbe_skipsearch = FALSE;
547 547 entry->dbe_time_rele = 0;
548 548 entry->dbe_id = 0;
549 549
550 550 if (table->dbt_id_space)
551 551 entry->dbe_id = id;
552 552 entry->dbe_table = table;
553 553
554 554 for (i = 0; i < table->dbt_maxcnt; i++) {
555 555 entry->dbe_indices[i].next = entry->dbe_indices[i].prev = NULL;
556 556 entry->dbe_indices[i].entry = entry;
557 557 /*
558 558 * We mark the entry as not indexed by setting the low
559 559 * order bit, since address are word aligned. This has
560 560 * the advantage of causeing a trap if the address is
561 561 * used. After the entry is linked in to the
562 562 * corresponding index the bit will be cleared.
563 563 */
564 564 INVALIDATE_ADDR(entry->dbe_indices[i].entry);
565 565 }
566 566
567 567 entry->dbe_data = (rfs4_entry_t)&entry->dbe_indices[table->dbt_maxcnt];
568 568 bzero(entry->dbe_data, table->dbt_usize);
569 569 entry->dbe_data->dbe = entry;
570 570
571 571 if (!(*table->dbt_create)(entry->dbe_data, data)) {
572 572 kmem_cache_free(table->dbt_mem_cache, entry);
573 573 return (NULL);
574 574 }
575 575
576 576 mutex_enter(table->dbt_lock);
577 577 table->dbt_count++;
578 578 mutex_exit(table->dbt_lock);
579 579
580 580 return (entry);
581 581 }
582 582
583 583 static void
584 584 rfs4_dbe_tabreap_adjust(rfs4_table_t *table)
585 585 {
586 586 clock_t tabreap;
587 587 clock_t reap_int;
588 588 uint32_t in_use;
589 589
590 590 /*
591 591 * Adjust the table's reap interval based on the
592 592 * number of id's currently in use. Each table's
593 593 * default remains the same if id usage subsides.
594 594 */
595 595 ASSERT(MUTEX_HELD(&table->dbt_reaper_cv_lock));
596 596 tabreap = MIN(rfs4_reap_interval, table->dbt_max_cache_time);
597 597
598 598 in_use = table->dbt_count + 1; /* see rfs4_dbe_create */
599 599 if (in_use >= table->dbt_id_hwat) {
600 600 ASSERT(t_hreap != 0);
601 601 reap_int = (tabreap * t_hreap) / 100;
602 602 } else if (in_use >= table->dbt_id_lwat) {
603 603 ASSERT(t_lreap != 0);
604 604 reap_int = (tabreap * t_lreap) / 100;
605 605 } else {
606 606 reap_int = tabreap;
607 607 }
608 608 table->dbt_id_reap = reap_int;
609 609 DTRACE_PROBE2(table__reap__interval, char *,
610 610 table->dbt_name, time_t, table->dbt_id_reap);
611 611 }
612 612
613 613 rfs4_entry_t
614 614 rfs4_dbsearch(rfs4_index_t *idx, void *key, bool_t *create, void *arg,
615 615 rfs4_dbsearch_type_t dbsearch_type)
616 616 {
617 617 int already_done;
618 618 uint32_t i;
619 619 rfs4_table_t *table = idx->dbi_table;
620 620 rfs4_index_t *ip;
621 621 rfs4_bucket_t *bp;
622 622 rfs4_link_t *l;
623 623 rfs4_dbe_t *entry;
624 624 id_t id = -1;
625 625
626 626 i = HASH(idx, key);
627 627 bp = &idx->dbi_buckets[i];
628 628
629 629 NFS4_DEBUG(table->dbt_debug & SEARCH_DEBUG,
630 630 (CE_NOTE, "Searching for key %p in table %s by %s",
631 631 key, table->dbt_name, idx->dbi_keyname));
632 632
633 633 rw_enter(bp->dbk_lock, RW_READER);
634 634 retry:
635 635 for (l = bp->dbk_head; l; l = l->next) {
636 636 if (l->entry->dbe_refcnt > 0 &&
637 637 (l->entry->dbe_skipsearch == FALSE ||
638 638 (l->entry->dbe_skipsearch == TRUE &&
639 639 dbsearch_type == RFS4_DBS_INVALID)) &&
640 640 (*idx->dbi_compare)(l->entry->dbe_data, key)) {
641 641 mutex_enter(l->entry->dbe_lock);
642 642 if (l->entry->dbe_refcnt == 0) {
643 643 mutex_exit(l->entry->dbe_lock);
644 644 continue;
645 645 }
646 646
647 647 /* place an additional hold since we are returning */
648 648 rfs4_dbe_hold(l->entry);
649 649
650 650 mutex_exit(l->entry->dbe_lock);
651 651 rw_exit(bp->dbk_lock);
652 652
653 653 *create = FALSE;
654 654
655 655 NFS4_DEBUG((table->dbt_debug & SEARCH_DEBUG),
656 656 (CE_NOTE, "Found entry %p for %p in table %s",
657 657 (void *)l->entry, key, table->dbt_name));
658 658
659 659 if (id != -1)
660 660 id_free(table->dbt_id_space, id);
661 661 return (l->entry->dbe_data);
662 662 }
663 663 }
664 664
665 665 if (!*create || table->dbt_create == NULL || !idx->dbi_createable ||
666 666 table->dbt_maxentries == table->dbt_count) {
667 667 NFS4_DEBUG(table->dbt_debug & SEARCH_DEBUG,
668 668 (CE_NOTE, "Entry for %p in %s not found",
669 669 key, table->dbt_name));
670 670
671 671 rw_exit(bp->dbk_lock);
672 672 if (id != -1)
673 673 id_free(table->dbt_id_space, id);
674 674 return (NULL);
675 675 }
676 676
677 677 if (table->dbt_id_space && id == -1) {
678 678 rw_exit(bp->dbk_lock);
679 679
680 680 /* get an id, ok to sleep for it here */
681 681 id = id_alloc(table->dbt_id_space);
682 682 ASSERT(id != -1);
683 683
684 684 mutex_enter(&table->dbt_reaper_cv_lock);
685 685 rfs4_dbe_tabreap_adjust(table);
686 686 mutex_exit(&table->dbt_reaper_cv_lock);
687 687
688 688 rw_enter(bp->dbk_lock, RW_WRITER);
689 689 goto retry;
690 690 }
691 691
692 692 /* get an exclusive lock on the bucket */
693 693 if (rw_read_locked(bp->dbk_lock) && !rw_tryupgrade(bp->dbk_lock)) {
694 694 NFS4_DEBUG(table->dbt_debug & OTHER_DEBUG,
695 695 (CE_NOTE, "Trying to upgrade lock on "
696 696 "hash chain %d (%p) for %s by %s",
697 697 i, (void*)bp, table->dbt_name, idx->dbi_keyname));
698 698
699 699 rw_exit(bp->dbk_lock);
700 700 rw_enter(bp->dbk_lock, RW_WRITER);
701 701 goto retry;
702 702 }
703 703
704 704 /* create entry */
705 705 entry = rfs4_dbe_create(table, id, arg);
706 706 if (entry == NULL) {
707 707 rw_exit(bp->dbk_lock);
708 708 if (id != -1)
709 709 id_free(table->dbt_id_space, id);
710 710
711 711 NFS4_DEBUG(table->dbt_debug & CREATE_DEBUG,
712 712 (CE_NOTE, "Constructor for table %s failed",
713 713 table->dbt_name));
714 714 return (NULL);
715 715 }
716 716
717 717 /*
718 718 * Add one ref for entry into table's hash - only one
719 719 * reference added even though there may be multiple indices
720 720 */
721 721 rfs4_dbe_hold(entry);
722 722 ENQUEUE(bp->dbk_head, &entry->dbe_indices[idx->dbi_tblidx]);
723 723 VALIDATE_ADDR(entry->dbe_indices[idx->dbi_tblidx].entry);
724 724
725 725 already_done = idx->dbi_tblidx;
726 726 rw_exit(bp->dbk_lock);
727 727
728 728 for (ip = table->dbt_indices; ip; ip = ip->dbi_inext) {
729 729 if (ip->dbi_tblidx == already_done)
730 730 continue;
731 731 l = &entry->dbe_indices[ip->dbi_tblidx];
732 732 i = HASH(ip, ip->dbi_mkkey(entry->dbe_data));
733 733 ASSERT(i < ip->dbi_table->dbt_len);
734 734 bp = &ip->dbi_buckets[i];
735 735 ENQUEUE_IDX(bp, l);
736 736 }
737 737
738 738 NFS4_DEBUG(
739 739 table->dbt_debug & SEARCH_DEBUG || table->dbt_debug & CREATE_DEBUG,
740 740 (CE_NOTE, "Entry %p created for %s = %p in table %s",
741 741 (void*)entry, idx->dbi_keyname, (void*)key, table->dbt_name));
742 742
743 743 return (entry->dbe_data);
744 744 }
|
↓ open down ↓ |
744 lines elided |
↑ open up ↑ |
745 745
746 746 /*ARGSUSED*/
747 747 boolean_t
748 748 rfs4_cpr_callb(void *arg, int code)
749 749 {
750 750 rfs4_bucket_t *buckets, *bp;
751 751 rfs4_link_t *l;
752 752 rfs4_client_t *cp;
753 753 int i;
754 754
755 - nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
755 + nfs4_srv_t *nsrv4 = nfs4_get_srv();
756 756 rfs4_table_t *table = nsrv4->rfs4_client_tab;
757 757
758 758 /*
759 759 * We get called for Suspend and Resume events.
760 760 * For the suspend case we simply don't care! Nor do we care if
761 761 * there are no clients.
762 762 */
763 763 if (code == CB_CODE_CPR_CHKPT || table == NULL) {
764 764 return (B_TRUE);
765 765 }
766 766
767 767 buckets = table->dbt_indices->dbi_buckets;
768 768
769 769 /*
770 770 * When we get this far we are in the process of
771 771 * resuming the system from a previous suspend.
772 772 *
773 773 * We are going to blast through and update the
774 774 * last_access time for all the clients and in
775 775 * doing so extend them by one lease period.
776 776 */
777 777 for (i = 0; i < table->dbt_len; i++) {
778 778 bp = &buckets[i];
779 779 for (l = bp->dbk_head; l; l = l->next) {
780 780 cp = (rfs4_client_t *)l->entry->dbe_data;
781 781 cp->rc_last_access = gethrestime_sec();
782 782 }
783 783 }
784 784
785 785 return (B_TRUE);
786 786 }
787 787
788 788 /*
789 789 * Given a table, lock each of the buckets and walk all entries (in
790 790 * turn locking those) and calling the provided "callout" function
791 791 * with the provided parameter. Obviously used to iterate across all
792 792 * entries in a particular table via the database locking hierarchy.
793 793 * Obviously the caller must not hold locks on any of the entries in
794 794 * the specified table.
795 795 */
796 796 void
797 797 rfs4_dbe_walk(rfs4_table_t *table,
798 798 void (*callout)(rfs4_entry_t, void *),
799 799 void *data)
800 800 {
801 801 rfs4_bucket_t *buckets = table->dbt_indices->dbi_buckets, *bp;
802 802 rfs4_link_t *l;
803 803 rfs4_dbe_t *entry;
804 804 int i;
805 805
806 806 NFS4_DEBUG(table->dbt_debug & WALK_DEBUG,
807 807 (CE_NOTE, "Walking entries in %s", table->dbt_name));
808 808
809 809 /* Walk the buckets looking for entries to release/destroy */
810 810 for (i = 0; i < table->dbt_len; i++) {
811 811 bp = &buckets[i];
812 812 rw_enter(bp->dbk_lock, RW_READER);
813 813 for (l = bp->dbk_head; l; l = l->next) {
814 814 entry = l->entry;
815 815 mutex_enter(entry->dbe_lock);
816 816 (*callout)(entry->dbe_data, data);
817 817 mutex_exit(entry->dbe_lock);
818 818 }
819 819 rw_exit(bp->dbk_lock);
820 820 }
821 821
822 822 NFS4_DEBUG(table->dbt_debug & WALK_DEBUG,
823 823 (CE_NOTE, "Walking entries complete %s", table->dbt_name));
824 824 }
825 825
826 826
827 827 static void
828 828 rfs4_dbe_reap(rfs4_table_t *table, time_t cache_time, uint32_t desired)
829 829 {
830 830 rfs4_index_t *idx = table->dbt_indices;
831 831 rfs4_bucket_t *buckets = idx->dbi_buckets, *bp;
832 832 rfs4_link_t *l, *t;
833 833 rfs4_dbe_t *entry;
834 834 bool_t found;
835 835 int i;
836 836 int count = 0;
837 837
838 838 NFS4_DEBUG(table->dbt_debug & REAP_DEBUG,
839 839 (CE_NOTE, "Reaping %d entries older than %ld seconds in table %s",
840 840 desired, cache_time, table->dbt_name));
841 841
842 842 /* Walk the buckets looking for entries to release/destroy */
843 843 for (i = 0; i < table->dbt_len; i++) {
844 844 int retries = 0;
845 845 bp = &buckets[i];
846 846 do {
847 847 found = FALSE;
848 848 rw_enter(bp->dbk_lock, RW_READER);
849 849 for (l = bp->dbk_head; l; l = l->next) {
850 850 entry = l->entry;
851 851 mutex_enter(entry->dbe_lock);
852 852 ASSERT(entry->dbe_refcnt != 0);
853 853 /*
854 854 * Examine an entry. Ref count of 1 means
855 855 * that the only reference is for the hash
856 856 * table reference.
857 857 */
858 858 if (entry->dbe_refcnt != 1) {
859 859 #ifdef DEBUG
860 860 rfs4_dbe_debug(entry);
861 861 #endif
862 862 mutex_exit(entry->dbe_lock);
863 863 continue;
864 864 }
865 865 if ((entry->dbe_refcnt == 1) &&
866 866 (table->dbt_reaper_shutdown ||
867 867 table->dbt_expiry == NULL ||
868 868 (*table->dbt_expiry)(entry->dbe_data))) {
869 869 rfs4_dbe_rele_nolock(entry);
870 870 count++;
871 871 found = TRUE;
872 872 }
873 873 mutex_exit(entry->dbe_lock);
874 874 }
875 875 if (found) {
876 876 if (!rw_tryupgrade(bp->dbk_lock)) {
877 877 rw_exit(bp->dbk_lock);
878 878 rw_enter(bp->dbk_lock, RW_WRITER);
879 879 }
880 880
881 881 l = bp->dbk_head;
882 882 while (l) {
883 883 t = l;
884 884 entry = t->entry;
885 885 l = l->next;
886 886 mutex_enter(entry->dbe_lock);
887 887 if (entry->dbe_refcnt == 0) {
888 888 DEQUEUE(bp->dbk_head, t);
889 889 mutex_exit(entry->dbe_lock);
890 890 t->next = NULL;
891 891 t->prev = NULL;
892 892 INVALIDATE_ADDR(t->entry);
893 893 rfs4_dbe_destroy(entry);
894 894 } else
895 895 mutex_exit(entry->dbe_lock);
896 896 }
897 897 }
898 898 rw_exit(bp->dbk_lock);
899 899 /*
900 900 * delay slightly if there is more work to do
901 901 * with the expectation that other reaper
902 902 * threads are freeing data structures as well
903 903 * and in turn will reduce ref counts on
904 904 * entries in this table allowing them to be
905 905 * released. This is only done in the
906 906 * instance that the tables are being shut down.
907 907 */
908 908 if (table->dbt_reaper_shutdown && bp->dbk_head != NULL) {
909 909 delay(hz/100);
910 910 retries++;
911 911 }
912 912 /*
913 913 * If this is a table shutdown, keep going until
914 914 * everything is gone
915 915 */
916 916 } while (table->dbt_reaper_shutdown && bp->dbk_head != NULL && retries < 5);
917 917
918 918 if (!table->dbt_reaper_shutdown && desired && count >= desired)
919 919 break;
920 920 }
921 921
922 922 NFS4_DEBUG(table->dbt_debug & REAP_DEBUG,
923 923 (CE_NOTE, "Reaped %d entries older than %ld seconds in table %s",
924 924 count, cache_time, table->dbt_name));
925 925 }
926 926
927 927 static void
928 928 reaper_thread(caddr_t *arg)
929 929 {
930 930 rfs4_table_t *table = (rfs4_table_t *)arg;
931 931 clock_t rc;
932 932
933 933 NFS4_DEBUG(table->dbt_debug,
934 934 (CE_NOTE, "rfs4_reaper_thread starting for %s", table->dbt_name));
935 935
936 936 CALLB_CPR_INIT(&table->dbt_reaper_cpr_info, &table->dbt_reaper_cv_lock,
937 937 callb_generic_cpr, "nfsv4Reaper");
938 938
939 939 mutex_enter(&table->dbt_reaper_cv_lock);
940 940 do {
941 941 CALLB_CPR_SAFE_BEGIN(&table->dbt_reaper_cpr_info);
942 942 rc = cv_reltimedwait_sig(&table->dbt_reaper_wait,
943 943 &table->dbt_reaper_cv_lock,
944 944 SEC_TO_TICK(table->dbt_id_reap), TR_CLOCK_TICK);
945 945 CALLB_CPR_SAFE_END(&table->dbt_reaper_cpr_info,
946 946 &table->dbt_reaper_cv_lock);
947 947 rfs4_dbe_reap(table, table->dbt_max_cache_time, 0);
948 948 } while (rc != 0 && table->dbt_reaper_shutdown == FALSE);
949 949
950 950 CALLB_CPR_EXIT(&table->dbt_reaper_cpr_info);
951 951
952 952 NFS4_DEBUG(table->dbt_debug,
953 953 (CE_NOTE, "rfs4_reaper_thread exiting for %s", table->dbt_name));
954 954
955 955 /* Notify the database shutdown processing that the table is shutdown */
956 956 mutex_enter(table->dbt_db->db_lock);
957 957 table->dbt_db->db_shutdown_count--;
958 958 cv_signal(&table->dbt_db->db_shutdown_wait);
959 959 mutex_exit(table->dbt_db->db_lock);
960 960 zthread_exit();
961 961 }
962 962
963 963 static void
964 964 rfs4_start_reaper(rfs4_table_t *table)
965 965 {
966 966 if (table->dbt_max_cache_time == 0)
967 967 return;
968 968
969 969 (void) zthread_create(NULL, 0, reaper_thread, table, 0,
970 970 minclsyspri);
971 971 }
972 972
973 973 #ifdef DEBUG
974 974 void
975 975 rfs4_dbe_debug(rfs4_dbe_t *entry)
976 976 {
977 977 cmn_err(CE_NOTE, "Entry %p from table %s",
978 978 (void *)entry, entry->dbe_table->dbt_name);
979 979 cmn_err(CE_CONT, "\trefcnt = %d id = %d",
980 980 entry->dbe_refcnt, entry->dbe_id);
981 981 }
982 982 #endif
|
↓ open down ↓ |
217 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX