Print this page
11083 support NFS server in zone
Portions contributed by: Dan Kruchinin <dan.kruchinin@nexenta.com>
Portions contributed by: Stepan Zastupov <stepan.zastupov@gmail.com>
Portions contributed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Portions contributed by: Mike Zeller <mike@mikezeller.net>
Portions contributed by: Dan McDonald <danmcd@joyent.com>
Portions contributed by: Gordon Ross <gordon.w.ross@gmail.com>
Portions contributed by: Vitaliy Gusev <gusev.vitaliy@gmail.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Jason King <jbk@joyent.com>
Reviewed by: C Fraire <cfraire@me.com>
Change-Id: I22f289d357503f9b48a0bc2482cc4328a6d43d16
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/nfs/nfs4_db.c
+++ new/usr/src/uts/common/fs/nfs/nfs4_db.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
|
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 +
21 22 /*
22 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
23 24 */
24 25
26 +/*
27 + * Copyright 2018 Nexenta Systems, Inc.
28 + */
29 +
25 30 #include <sys/systm.h>
26 31 #include <sys/cmn_err.h>
27 32 #include <sys/kmem.h>
28 33 #include <sys/disp.h>
29 34 #include <sys/id_space.h>
30 35 #include <sys/atomic.h>
31 36 #include <rpc/rpc.h>
32 37 #include <nfs/nfs4.h>
33 38 #include <nfs/nfs4_db_impl.h>
34 39 #include <sys/sdt.h>
35 40
36 41 static int rfs4_reap_interval = RFS4_REAP_INTERVAL;
37 42
38 43 static void rfs4_dbe_reap(rfs4_table_t *, time_t, uint32_t);
39 44 static void rfs4_dbe_destroy(rfs4_dbe_t *);
40 45 static rfs4_dbe_t *rfs4_dbe_create(rfs4_table_t *, id_t, rfs4_entry_t);
41 46 static void rfs4_start_reaper(rfs4_table_t *);
42 47
43 48 /*
44 49 * t_lowat - integer percentage of table entries /etc/system only
45 50 * t_hiwat - integer percentage of table entries /etc/system only
46 51 * t_lreap - integer percentage of table reap time mdb or /etc/system
47 52 * t_hreap - integer percentage of table reap time mdb or /etc/system
48 53 */
49 54 uint32_t t_lowat = 50; /* reap at t_lreap when id's in use hit 50% */
50 55 uint32_t t_hiwat = 75; /* reap at t_hreap when id's in use hit 75% */
51 56 time_t t_lreap = 50; /* default to 50% of table's reap interval */
52 57 time_t t_hreap = 10; /* default to 10% of table's reap interval */
53 58
54 59 id_t
55 60 rfs4_dbe_getid(rfs4_dbe_t *entry)
56 61 {
57 62 return (entry->dbe_id);
58 63 }
59 64
60 65 void
61 66 rfs4_dbe_hold(rfs4_dbe_t *entry)
62 67 {
63 68 atomic_inc_32(&entry->dbe_refcnt);
64 69 }
65 70
66 71 /*
67 72 * rfs4_dbe_rele_nolock only decrements the reference count of the entry.
68 73 */
69 74 void
70 75 rfs4_dbe_rele_nolock(rfs4_dbe_t *entry)
71 76 {
72 77 atomic_dec_32(&entry->dbe_refcnt);
73 78 }
74 79
75 80
76 81 uint32_t
77 82 rfs4_dbe_refcnt(rfs4_dbe_t *entry)
78 83 {
79 84 return (entry->dbe_refcnt);
80 85 }
81 86
82 87 /*
83 88 * Mark an entry such that the dbsearch will skip it.
84 89 * Caller does not want this entry to be found any longer
85 90 */
86 91 void
87 92 rfs4_dbe_invalidate(rfs4_dbe_t *entry)
88 93 {
89 94 entry->dbe_invalid = TRUE;
90 95 entry->dbe_skipsearch = TRUE;
91 96 }
92 97
93 98 /*
94 99 * Is this entry invalid?
95 100 */
96 101 bool_t
97 102 rfs4_dbe_is_invalid(rfs4_dbe_t *entry)
98 103 {
99 104 return (entry->dbe_invalid);
100 105 }
101 106
102 107 time_t
103 108 rfs4_dbe_get_timerele(rfs4_dbe_t *entry)
104 109 {
105 110 return (entry->dbe_time_rele);
106 111 }
107 112
108 113 /*
109 114 * Use these to temporarily hide/unhide a db entry.
110 115 */
111 116 void
112 117 rfs4_dbe_hide(rfs4_dbe_t *entry)
113 118 {
114 119 rfs4_dbe_lock(entry);
115 120 entry->dbe_skipsearch = TRUE;
116 121 rfs4_dbe_unlock(entry);
117 122 }
118 123
119 124 void
120 125 rfs4_dbe_unhide(rfs4_dbe_t *entry)
121 126 {
122 127 rfs4_dbe_lock(entry);
123 128 entry->dbe_skipsearch = FALSE;
124 129 rfs4_dbe_unlock(entry);
125 130 }
126 131
127 132 void
128 133 rfs4_dbe_rele(rfs4_dbe_t *entry)
129 134 {
130 135 mutex_enter(entry->dbe_lock);
131 136 ASSERT(entry->dbe_refcnt > 1);
132 137 atomic_dec_32(&entry->dbe_refcnt);
133 138 entry->dbe_time_rele = gethrestime_sec();
134 139 mutex_exit(entry->dbe_lock);
135 140 }
136 141
137 142 void
138 143 rfs4_dbe_lock(rfs4_dbe_t *entry)
139 144 {
140 145 mutex_enter(entry->dbe_lock);
141 146 }
142 147
143 148 void
144 149 rfs4_dbe_unlock(rfs4_dbe_t *entry)
145 150 {
146 151 mutex_exit(entry->dbe_lock);
147 152 }
148 153
149 154 bool_t
150 155 rfs4_dbe_islocked(rfs4_dbe_t *entry)
151 156 {
152 157 return (mutex_owned(entry->dbe_lock));
153 158 }
154 159
155 160 clock_t
156 161 rfs4_dbe_twait(rfs4_dbe_t *entry, clock_t timeout)
157 162 {
158 163 return (cv_timedwait(entry->dbe_cv, entry->dbe_lock, timeout));
159 164 }
160 165
161 166 void
162 167 rfs4_dbe_cv_broadcast(rfs4_dbe_t *entry)
163 168 {
164 169 cv_broadcast(entry->dbe_cv);
165 170 }
166 171
167 172 /* ARGSUSED */
168 173 static int
169 174 rfs4_dbe_kmem_constructor(void *obj, void *private, int kmflag)
170 175 {
171 176 rfs4_dbe_t *entry = obj;
172 177
173 178 mutex_init(entry->dbe_lock, NULL, MUTEX_DEFAULT, NULL);
174 179 cv_init(entry->dbe_cv, NULL, CV_DEFAULT, NULL);
175 180
176 181 return (0);
177 182 }
178 183
179 184 static void
180 185 rfs4_dbe_kmem_destructor(void *obj, void *private)
181 186 {
182 187 rfs4_dbe_t *entry = obj;
183 188 /*LINTED*/
184 189 rfs4_table_t *table = private;
185 190
186 191 mutex_destroy(entry->dbe_lock);
187 192 cv_destroy(entry->dbe_cv);
188 193 }
189 194
190 195 rfs4_database_t *
191 196 rfs4_database_create(uint32_t flags)
192 197 {
193 198 rfs4_database_t *db;
194 199
195 200 db = kmem_alloc(sizeof (rfs4_database_t), KM_SLEEP);
196 201 mutex_init(db->db_lock, NULL, MUTEX_DEFAULT, NULL);
197 202 db->db_tables = NULL;
198 203 db->db_debug_flags = flags;
199 204 db->db_shutdown_count = 0;
200 205 cv_init(&db->db_shutdown_wait, NULL, CV_DEFAULT, NULL);
201 206 return (db);
202 207 }
203 208
204 209
205 210 /*
206 211 * The reaper threads that have been created for the tables in this
207 212 * database must be stopped and the entries in the tables released.
208 213 * Each table will be marked as "shutdown" and the reaper threads
209 214 * poked and they will see that a shutdown is in progress and cleanup
210 215 * and exit. This function waits for all reaper threads to stop
211 216 * before returning to the caller.
212 217 */
213 218 void
214 219 rfs4_database_shutdown(rfs4_database_t *db)
215 220 {
216 221 rfs4_table_t *table;
217 222
218 223 mutex_enter(db->db_lock);
219 224 for (table = db->db_tables; table; table = table->dbt_tnext) {
220 225 mutex_enter(&table->dbt_reaper_cv_lock);
221 226 table->dbt_reaper_shutdown = TRUE;
222 227 cv_broadcast(&table->dbt_reaper_wait);
223 228 db->db_shutdown_count++;
224 229 mutex_exit(&table->dbt_reaper_cv_lock);
225 230 }
226 231 while (db->db_shutdown_count > 0) {
227 232 cv_wait(&db->db_shutdown_wait, db->db_lock);
228 233 }
229 234 mutex_exit(db->db_lock);
230 235 }
231 236
232 237 /*
233 238 * Given a database that has been "shutdown" by the function above all
234 239 * of the table tables are destroyed and then the database itself
235 240 * freed.
236 241 */
237 242 void
238 243 rfs4_database_destroy(rfs4_database_t *db)
239 244 {
240 245 rfs4_table_t *next, *tmp;
241 246
|
↓ open down ↓ |
207 lines elided |
↑ open up ↑ |
242 247 for (next = db->db_tables; next; ) {
243 248 tmp = next;
244 249 next = tmp->dbt_tnext;
245 250 rfs4_table_destroy(db, tmp);
246 251 }
247 252
248 253 mutex_destroy(db->db_lock);
249 254 kmem_free(db, sizeof (rfs4_database_t));
250 255 }
251 256
257 +/*
258 + * Used to get the correct kmem_cache database for the state table being
259 + * created.
260 + * Helper function for rfs4_table_create
261 + */
262 +static kmem_cache_t *
263 +get_db_mem_cache(char *name)
264 +{
265 + int i;
266 +
267 + for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) {
268 + if (strcmp(name, rfs4_db_mem_cache_table[i].r_db_name) == 0)
269 + return (rfs4_db_mem_cache_table[i].r_db_mem_cache);
270 + }
271 + /*
272 + * There is no associated kmem cache for this NFS4 server state
273 + * table name
274 + */
275 + return (NULL);
276 +}
277 +
278 +/*
279 + * Used to initialize the global NFSv4 server state database.
280 + * Helper funtion for rfs4_state_g_init and called when module is loaded.
281 + */
282 +kmem_cache_t *
283 +/* CSTYLED */
284 +nfs4_init_mem_cache(char *cache_name, uint32_t idxcnt, uint32_t size, uint32_t idx)
285 +{
286 + kmem_cache_t *mem_cache = kmem_cache_create(cache_name,
287 + sizeof (rfs4_dbe_t) + idxcnt * sizeof (rfs4_link_t) + size,
288 + 0,
289 + rfs4_dbe_kmem_constructor,
290 + rfs4_dbe_kmem_destructor,
291 + NULL,
292 + NULL,
293 + NULL,
294 + 0);
295 + (void) strlcpy(rfs4_db_mem_cache_table[idx].r_db_name, cache_name,
296 + strlen(cache_name) + 1);
297 + rfs4_db_mem_cache_table[idx].r_db_mem_cache = mem_cache;
298 + return (mem_cache);
299 +}
300 +
252 301 rfs4_table_t *
253 302 rfs4_table_create(rfs4_database_t *db, char *tabname, time_t max_cache_time,
254 303 uint32_t idxcnt, bool_t (*create)(rfs4_entry_t, void *),
255 304 void (*destroy)(rfs4_entry_t),
256 305 bool_t (*expiry)(rfs4_entry_t),
257 306 uint32_t size, uint32_t hashsize,
258 307 uint32_t maxentries, id_t start)
259 308 {
260 309 rfs4_table_t *table;
261 310 int len;
262 311 char *cache_name;
263 312 char *id_name;
264 313
265 314 table = kmem_alloc(sizeof (rfs4_table_t), KM_SLEEP);
266 315 table->dbt_db = db;
267 316 rw_init(table->dbt_t_lock, NULL, RW_DEFAULT, NULL);
268 317 mutex_init(table->dbt_lock, NULL, MUTEX_DEFAULT, NULL);
269 318 mutex_init(&table->dbt_reaper_cv_lock, NULL, MUTEX_DEFAULT, NULL);
270 319 cv_init(&table->dbt_reaper_wait, NULL, CV_DEFAULT, NULL);
271 320
272 321 len = strlen(tabname);
273 322 table->dbt_name = kmem_alloc(len+1, KM_SLEEP);
274 323 cache_name = kmem_alloc(len + 12 /* "_entry_cache" */ + 1, KM_SLEEP);
275 324 (void) strcpy(table->dbt_name, tabname);
276 325 (void) sprintf(cache_name, "%s_entry_cache", table->dbt_name);
277 326 table->dbt_max_cache_time = max_cache_time;
278 327 table->dbt_usize = size;
279 328 table->dbt_len = hashsize;
280 329 table->dbt_count = 0;
281 330 table->dbt_idxcnt = 0;
282 331 table->dbt_ccnt = 0;
283 332 table->dbt_maxcnt = idxcnt;
284 333 table->dbt_indices = NULL;
285 334 table->dbt_id_space = NULL;
286 335 table->dbt_reaper_shutdown = FALSE;
287 336
288 337 if (start >= 0) {
289 338 if (maxentries + (uint32_t)start > (uint32_t)INT32_MAX)
290 339 maxentries = INT32_MAX - start;
291 340 id_name = kmem_alloc(len + 9 /* "_id_space" */ + 1, KM_SLEEP);
292 341 (void) sprintf(id_name, "%s_id_space", table->dbt_name);
293 342 table->dbt_id_space = id_space_create(id_name, start,
294 343 maxentries + start);
295 344 kmem_free(id_name, len + 10);
296 345 }
|
↓ open down ↓ |
35 lines elided |
↑ open up ↑ |
297 346 ASSERT(t_lowat != 0);
298 347 table->dbt_id_lwat = (maxentries * t_lowat) / 100;
299 348 ASSERT(t_hiwat != 0);
300 349 table->dbt_id_hwat = (maxentries * t_hiwat) / 100;
301 350 table->dbt_id_reap = MIN(rfs4_reap_interval, max_cache_time);
302 351 table->dbt_maxentries = maxentries;
303 352 table->dbt_create = create;
304 353 table->dbt_destroy = destroy;
305 354 table->dbt_expiry = expiry;
306 355
307 - table->dbt_mem_cache = kmem_cache_create(cache_name,
308 - sizeof (rfs4_dbe_t) + idxcnt * sizeof (rfs4_link_t) + size,
309 - 0,
310 - rfs4_dbe_kmem_constructor,
311 - rfs4_dbe_kmem_destructor,
312 - NULL,
313 - table,
314 - NULL,
315 - 0);
356 + /*
357 + * get the correct kmem_cache for this table type based on the name.
358 + */
359 + table->dbt_mem_cache = get_db_mem_cache(cache_name);
360 +
316 361 kmem_free(cache_name, len+13);
317 362
318 363 table->dbt_debug = db->db_debug_flags;
319 364
320 365 mutex_enter(db->db_lock);
321 366 table->dbt_tnext = db->db_tables;
322 367 db->db_tables = table;
323 368 mutex_exit(db->db_lock);
324 369
325 370 rfs4_start_reaper(table);
326 371
327 372 return (table);
328 373 }
329 374
330 375 void
331 376 rfs4_table_destroy(rfs4_database_t *db, rfs4_table_t *table)
332 377 {
333 378 rfs4_table_t *p;
334 379 rfs4_index_t *idx;
335 380
336 381 ASSERT(table->dbt_count == 0);
337 382
338 383 mutex_enter(db->db_lock);
339 384 if (table == db->db_tables)
340 385 db->db_tables = table->dbt_tnext;
341 386 else {
342 387 for (p = db->db_tables; p; p = p->dbt_tnext)
343 388 if (p->dbt_tnext == table) {
344 389 p->dbt_tnext = table->dbt_tnext;
345 390 table->dbt_tnext = NULL;
346 391 break;
347 392 }
348 393 ASSERT(p != NULL);
349 394 }
350 395 mutex_exit(db->db_lock);
351 396
352 397 /* Destroy indices */
353 398 while (table->dbt_indices) {
354 399 idx = table->dbt_indices;
355 400 table->dbt_indices = idx->dbi_inext;
356 401 rfs4_index_destroy(idx);
|
↓ open down ↓ |
31 lines elided |
↑ open up ↑ |
357 402 }
358 403
359 404 rw_destroy(table->dbt_t_lock);
360 405 mutex_destroy(table->dbt_lock);
361 406 mutex_destroy(&table->dbt_reaper_cv_lock);
362 407 cv_destroy(&table->dbt_reaper_wait);
363 408
364 409 kmem_free(table->dbt_name, strlen(table->dbt_name) + 1);
365 410 if (table->dbt_id_space)
366 411 id_space_destroy(table->dbt_id_space);
367 - kmem_cache_destroy(table->dbt_mem_cache);
412 + table->dbt_mem_cache = NULL;
368 413 kmem_free(table, sizeof (rfs4_table_t));
369 414 }
370 415
371 416 rfs4_index_t *
372 417 rfs4_index_create(rfs4_table_t *table, char *keyname,
373 418 uint32_t (*hash)(void *),
374 419 bool_t (compare)(rfs4_entry_t, void *),
375 420 void *(*mkkey)(rfs4_entry_t),
376 421 bool_t createable)
377 422 {
378 423 rfs4_index_t *idx;
379 424
380 425 ASSERT(table->dbt_idxcnt < table->dbt_maxcnt);
381 426
382 427 idx = kmem_alloc(sizeof (rfs4_index_t), KM_SLEEP);
383 428
384 429 idx->dbi_table = table;
385 430 idx->dbi_keyname = kmem_alloc(strlen(keyname) + 1, KM_SLEEP);
386 431 (void) strcpy(idx->dbi_keyname, keyname);
387 432 idx->dbi_hash = hash;
388 433 idx->dbi_compare = compare;
389 434 idx->dbi_mkkey = mkkey;
390 435 idx->dbi_tblidx = table->dbt_idxcnt;
391 436 table->dbt_idxcnt++;
392 437 if (createable) {
393 438 table->dbt_ccnt++;
394 439 if (table->dbt_ccnt > 1)
395 440 panic("Table %s currently can have only have one "
396 441 "index that will allow creation of entries",
397 442 table->dbt_name);
398 443 idx->dbi_createable = TRUE;
399 444 } else {
400 445 idx->dbi_createable = FALSE;
401 446 }
402 447
403 448 idx->dbi_inext = table->dbt_indices;
404 449 table->dbt_indices = idx;
405 450 idx->dbi_buckets = kmem_zalloc(sizeof (rfs4_bucket_t) * table->dbt_len,
406 451 KM_SLEEP);
407 452
408 453 return (idx);
409 454 }
410 455
411 456 void
412 457 rfs4_index_destroy(rfs4_index_t *idx)
413 458 {
414 459 kmem_free(idx->dbi_keyname, strlen(idx->dbi_keyname) + 1);
415 460 kmem_free(idx->dbi_buckets,
416 461 sizeof (rfs4_bucket_t) * idx->dbi_table->dbt_len);
417 462 kmem_free(idx, sizeof (rfs4_index_t));
418 463 }
419 464
420 465 static void
421 466 rfs4_dbe_destroy(rfs4_dbe_t *entry)
422 467 {
423 468 rfs4_index_t *idx;
424 469 void *key;
425 470 int i;
426 471 rfs4_bucket_t *bp;
427 472 rfs4_table_t *table = entry->dbe_table;
428 473 rfs4_link_t *l;
429 474
430 475 NFS4_DEBUG(table->dbt_debug & DESTROY_DEBUG,
431 476 (CE_NOTE, "Destroying entry %p from %s",
432 477 (void*)entry, table->dbt_name));
433 478
434 479 mutex_enter(entry->dbe_lock);
435 480 ASSERT(entry->dbe_refcnt == 0);
436 481 mutex_exit(entry->dbe_lock);
437 482
438 483 /* Unlink from all indices */
439 484 for (idx = table->dbt_indices; idx; idx = idx->dbi_inext) {
440 485 l = &entry->dbe_indices[idx->dbi_tblidx];
441 486 /* check and see if we were ever linked in to the index */
442 487 if (INVALID_LINK(l)) {
443 488 ASSERT(l->next == NULL && l->prev == NULL);
444 489 continue;
445 490 }
446 491 key = idx->dbi_mkkey(entry->dbe_data);
447 492 i = HASH(idx, key);
448 493 bp = &idx->dbi_buckets[i];
449 494 ASSERT(bp->dbk_head != NULL);
450 495 DEQUEUE_IDX(bp, &entry->dbe_indices[idx->dbi_tblidx]);
451 496 }
452 497
453 498 /* Destroy user data */
454 499 if (table->dbt_destroy)
455 500 (*table->dbt_destroy)(entry->dbe_data);
456 501
457 502 if (table->dbt_id_space)
458 503 id_free(table->dbt_id_space, entry->dbe_id);
459 504
460 505 mutex_enter(table->dbt_lock);
461 506 table->dbt_count--;
462 507 mutex_exit(table->dbt_lock);
463 508
464 509 /* Destroy the entry itself */
465 510 kmem_cache_free(table->dbt_mem_cache, entry);
466 511 }
467 512
468 513
469 514 static rfs4_dbe_t *
470 515 rfs4_dbe_create(rfs4_table_t *table, id_t id, rfs4_entry_t data)
471 516 {
472 517 rfs4_dbe_t *entry;
473 518 int i;
474 519
475 520 NFS4_DEBUG(table->dbt_debug & CREATE_DEBUG,
476 521 (CE_NOTE, "Creating entry in table %s", table->dbt_name));
477 522
478 523 entry = kmem_cache_alloc(table->dbt_mem_cache, KM_SLEEP);
479 524
480 525 entry->dbe_refcnt = 1;
481 526 entry->dbe_invalid = FALSE;
482 527 entry->dbe_skipsearch = FALSE;
483 528 entry->dbe_time_rele = 0;
484 529 entry->dbe_id = 0;
485 530
486 531 if (table->dbt_id_space)
487 532 entry->dbe_id = id;
488 533 entry->dbe_table = table;
489 534
490 535 for (i = 0; i < table->dbt_maxcnt; i++) {
491 536 entry->dbe_indices[i].next = entry->dbe_indices[i].prev = NULL;
492 537 entry->dbe_indices[i].entry = entry;
493 538 /*
494 539 * We mark the entry as not indexed by setting the low
495 540 * order bit, since address are word aligned. This has
496 541 * the advantage of causeing a trap if the address is
497 542 * used. After the entry is linked in to the
498 543 * corresponding index the bit will be cleared.
499 544 */
500 545 INVALIDATE_ADDR(entry->dbe_indices[i].entry);
501 546 }
502 547
503 548 entry->dbe_data = (rfs4_entry_t)&entry->dbe_indices[table->dbt_maxcnt];
504 549 bzero(entry->dbe_data, table->dbt_usize);
505 550 entry->dbe_data->dbe = entry;
506 551
507 552 if (!(*table->dbt_create)(entry->dbe_data, data)) {
508 553 kmem_cache_free(table->dbt_mem_cache, entry);
509 554 return (NULL);
510 555 }
511 556
512 557 mutex_enter(table->dbt_lock);
513 558 table->dbt_count++;
514 559 mutex_exit(table->dbt_lock);
515 560
516 561 return (entry);
517 562 }
518 563
519 564 static void
520 565 rfs4_dbe_tabreap_adjust(rfs4_table_t *table)
521 566 {
522 567 clock_t tabreap;
523 568 clock_t reap_int;
524 569 uint32_t in_use;
525 570
526 571 /*
527 572 * Adjust the table's reap interval based on the
528 573 * number of id's currently in use. Each table's
529 574 * default remains the same if id usage subsides.
530 575 */
531 576 ASSERT(MUTEX_HELD(&table->dbt_reaper_cv_lock));
532 577 tabreap = MIN(rfs4_reap_interval, table->dbt_max_cache_time);
533 578
534 579 in_use = table->dbt_count + 1; /* see rfs4_dbe_create */
535 580 if (in_use >= table->dbt_id_hwat) {
536 581 ASSERT(t_hreap != 0);
537 582 reap_int = (tabreap * t_hreap) / 100;
538 583 } else if (in_use >= table->dbt_id_lwat) {
539 584 ASSERT(t_lreap != 0);
540 585 reap_int = (tabreap * t_lreap) / 100;
541 586 } else {
542 587 reap_int = tabreap;
543 588 }
544 589 table->dbt_id_reap = reap_int;
545 590 DTRACE_PROBE2(table__reap__interval, char *,
546 591 table->dbt_name, time_t, table->dbt_id_reap);
547 592 }
548 593
549 594 rfs4_entry_t
550 595 rfs4_dbsearch(rfs4_index_t *idx, void *key, bool_t *create, void *arg,
551 596 rfs4_dbsearch_type_t dbsearch_type)
552 597 {
553 598 int already_done;
554 599 uint32_t i;
555 600 rfs4_table_t *table = idx->dbi_table;
556 601 rfs4_index_t *ip;
557 602 rfs4_bucket_t *bp;
558 603 rfs4_link_t *l;
559 604 rfs4_dbe_t *entry;
560 605 id_t id = -1;
561 606
562 607 i = HASH(idx, key);
563 608 bp = &idx->dbi_buckets[i];
564 609
565 610 NFS4_DEBUG(table->dbt_debug & SEARCH_DEBUG,
566 611 (CE_NOTE, "Searching for key %p in table %s by %s",
567 612 key, table->dbt_name, idx->dbi_keyname));
568 613
569 614 rw_enter(bp->dbk_lock, RW_READER);
570 615 retry:
571 616 for (l = bp->dbk_head; l; l = l->next) {
572 617 if (l->entry->dbe_refcnt > 0 &&
573 618 (l->entry->dbe_skipsearch == FALSE ||
574 619 (l->entry->dbe_skipsearch == TRUE &&
575 620 dbsearch_type == RFS4_DBS_INVALID)) &&
576 621 (*idx->dbi_compare)(l->entry->dbe_data, key)) {
577 622 mutex_enter(l->entry->dbe_lock);
578 623 if (l->entry->dbe_refcnt == 0) {
579 624 mutex_exit(l->entry->dbe_lock);
580 625 continue;
581 626 }
582 627
583 628 /* place an additional hold since we are returning */
584 629 rfs4_dbe_hold(l->entry);
585 630
586 631 mutex_exit(l->entry->dbe_lock);
587 632 rw_exit(bp->dbk_lock);
588 633
589 634 *create = FALSE;
590 635
591 636 NFS4_DEBUG((table->dbt_debug & SEARCH_DEBUG),
592 637 (CE_NOTE, "Found entry %p for %p in table %s",
593 638 (void *)l->entry, key, table->dbt_name));
594 639
595 640 if (id != -1)
596 641 id_free(table->dbt_id_space, id);
597 642 return (l->entry->dbe_data);
598 643 }
599 644 }
600 645
601 646 if (!*create || table->dbt_create == NULL || !idx->dbi_createable ||
602 647 table->dbt_maxentries == table->dbt_count) {
603 648 NFS4_DEBUG(table->dbt_debug & SEARCH_DEBUG,
604 649 (CE_NOTE, "Entry for %p in %s not found",
605 650 key, table->dbt_name));
606 651
607 652 rw_exit(bp->dbk_lock);
608 653 if (id != -1)
609 654 id_free(table->dbt_id_space, id);
610 655 return (NULL);
611 656 }
612 657
613 658 if (table->dbt_id_space && id == -1) {
614 659 rw_exit(bp->dbk_lock);
615 660
616 661 /* get an id, ok to sleep for it here */
617 662 id = id_alloc(table->dbt_id_space);
618 663 ASSERT(id != -1);
619 664
620 665 mutex_enter(&table->dbt_reaper_cv_lock);
621 666 rfs4_dbe_tabreap_adjust(table);
622 667 mutex_exit(&table->dbt_reaper_cv_lock);
623 668
624 669 rw_enter(bp->dbk_lock, RW_WRITER);
625 670 goto retry;
626 671 }
627 672
628 673 /* get an exclusive lock on the bucket */
629 674 if (rw_read_locked(bp->dbk_lock) && !rw_tryupgrade(bp->dbk_lock)) {
630 675 NFS4_DEBUG(table->dbt_debug & OTHER_DEBUG,
631 676 (CE_NOTE, "Trying to upgrade lock on "
632 677 "hash chain %d (%p) for %s by %s",
633 678 i, (void*)bp, table->dbt_name, idx->dbi_keyname));
634 679
635 680 rw_exit(bp->dbk_lock);
636 681 rw_enter(bp->dbk_lock, RW_WRITER);
637 682 goto retry;
638 683 }
639 684
640 685 /* create entry */
641 686 entry = rfs4_dbe_create(table, id, arg);
642 687 if (entry == NULL) {
643 688 rw_exit(bp->dbk_lock);
644 689 if (id != -1)
645 690 id_free(table->dbt_id_space, id);
646 691
647 692 NFS4_DEBUG(table->dbt_debug & CREATE_DEBUG,
648 693 (CE_NOTE, "Constructor for table %s failed",
649 694 table->dbt_name));
650 695 return (NULL);
651 696 }
652 697
653 698 /*
654 699 * Add one ref for entry into table's hash - only one
655 700 * reference added even though there may be multiple indices
656 701 */
657 702 rfs4_dbe_hold(entry);
658 703 ENQUEUE(bp->dbk_head, &entry->dbe_indices[idx->dbi_tblidx]);
659 704 VALIDATE_ADDR(entry->dbe_indices[idx->dbi_tblidx].entry);
660 705
661 706 already_done = idx->dbi_tblidx;
662 707 rw_exit(bp->dbk_lock);
663 708
664 709 for (ip = table->dbt_indices; ip; ip = ip->dbi_inext) {
665 710 if (ip->dbi_tblidx == already_done)
666 711 continue;
667 712 l = &entry->dbe_indices[ip->dbi_tblidx];
668 713 i = HASH(ip, ip->dbi_mkkey(entry->dbe_data));
669 714 ASSERT(i < ip->dbi_table->dbt_len);
670 715 bp = &ip->dbi_buckets[i];
671 716 ENQUEUE_IDX(bp, l);
672 717 }
673 718
674 719 NFS4_DEBUG(
675 720 table->dbt_debug & SEARCH_DEBUG || table->dbt_debug & CREATE_DEBUG,
|
↓ open down ↓ |
298 lines elided |
↑ open up ↑ |
676 721 (CE_NOTE, "Entry %p created for %s = %p in table %s",
677 722 (void*)entry, idx->dbi_keyname, (void*)key, table->dbt_name));
678 723
679 724 return (entry->dbe_data);
680 725 }
681 726
682 727 /*ARGSUSED*/
683 728 boolean_t
684 729 rfs4_cpr_callb(void *arg, int code)
685 730 {
686 - rfs4_table_t *table = rfs4_client_tab;
687 731 rfs4_bucket_t *buckets, *bp;
688 732 rfs4_link_t *l;
689 733 rfs4_client_t *cp;
690 734 int i;
691 735
736 + nfs4_srv_t *nsrv4 = nfs4_get_srv();
737 + rfs4_table_t *table = nsrv4->rfs4_client_tab;
738 +
692 739 /*
693 740 * We get called for Suspend and Resume events.
694 741 * For the suspend case we simply don't care! Nor do we care if
695 742 * there are no clients.
696 743 */
697 744 if (code == CB_CODE_CPR_CHKPT || table == NULL) {
698 745 return (B_TRUE);
699 746 }
700 747
701 748 buckets = table->dbt_indices->dbi_buckets;
702 749
703 750 /*
704 751 * When we get this far we are in the process of
705 752 * resuming the system from a previous suspend.
706 753 *
707 754 * We are going to blast through and update the
708 755 * last_access time for all the clients and in
709 756 * doing so extend them by one lease period.
710 757 */
711 758 for (i = 0; i < table->dbt_len; i++) {
712 759 bp = &buckets[i];
713 760 for (l = bp->dbk_head; l; l = l->next) {
714 761 cp = (rfs4_client_t *)l->entry->dbe_data;
715 762 cp->rc_last_access = gethrestime_sec();
716 763 }
717 764 }
718 765
719 766 return (B_TRUE);
720 767 }
721 768
722 769 /*
723 770 * Given a table, lock each of the buckets and walk all entries (in
724 771 * turn locking those) and calling the provided "callout" function
725 772 * with the provided parameter. Obviously used to iterate across all
726 773 * entries in a particular table via the database locking hierarchy.
727 774 * Obviously the caller must not hold locks on any of the entries in
728 775 * the specified table.
729 776 */
730 777 void
731 778 rfs4_dbe_walk(rfs4_table_t *table,
732 779 void (*callout)(rfs4_entry_t, void *),
733 780 void *data)
734 781 {
735 782 rfs4_bucket_t *buckets = table->dbt_indices->dbi_buckets, *bp;
736 783 rfs4_link_t *l;
737 784 rfs4_dbe_t *entry;
738 785 int i;
739 786
740 787 NFS4_DEBUG(table->dbt_debug & WALK_DEBUG,
741 788 (CE_NOTE, "Walking entries in %s", table->dbt_name));
742 789
743 790 /* Walk the buckets looking for entries to release/destroy */
744 791 for (i = 0; i < table->dbt_len; i++) {
745 792 bp = &buckets[i];
746 793 rw_enter(bp->dbk_lock, RW_READER);
747 794 for (l = bp->dbk_head; l; l = l->next) {
748 795 entry = l->entry;
749 796 mutex_enter(entry->dbe_lock);
750 797 (*callout)(entry->dbe_data, data);
751 798 mutex_exit(entry->dbe_lock);
752 799 }
753 800 rw_exit(bp->dbk_lock);
754 801 }
755 802
756 803 NFS4_DEBUG(table->dbt_debug & WALK_DEBUG,
757 804 (CE_NOTE, "Walking entries complete %s", table->dbt_name));
758 805 }
759 806
760 807
761 808 static void
762 809 rfs4_dbe_reap(rfs4_table_t *table, time_t cache_time, uint32_t desired)
763 810 {
764 811 rfs4_index_t *idx = table->dbt_indices;
765 812 rfs4_bucket_t *buckets = idx->dbi_buckets, *bp;
766 813 rfs4_link_t *l, *t;
767 814 rfs4_dbe_t *entry;
768 815 bool_t found;
769 816 int i;
770 817 int count = 0;
771 818
772 819 NFS4_DEBUG(table->dbt_debug & REAP_DEBUG,
773 820 (CE_NOTE, "Reaping %d entries older than %ld seconds in table %s",
774 821 desired, cache_time, table->dbt_name));
775 822
776 823 /* Walk the buckets looking for entries to release/destroy */
777 824 for (i = 0; i < table->dbt_len; i++) {
778 825 bp = &buckets[i];
779 826 do {
780 827 found = FALSE;
781 828 rw_enter(bp->dbk_lock, RW_READER);
782 829 for (l = bp->dbk_head; l; l = l->next) {
783 830 entry = l->entry;
784 831 /*
785 832 * Examine an entry. Ref count of 1 means
786 833 * that the only reference is for the hash
787 834 * table reference.
788 835 */
789 836 if (entry->dbe_refcnt != 1)
790 837 continue;
791 838 mutex_enter(entry->dbe_lock);
792 839 if ((entry->dbe_refcnt == 1) &&
793 840 (table->dbt_reaper_shutdown ||
794 841 table->dbt_expiry == NULL ||
795 842 (*table->dbt_expiry)(entry->dbe_data))) {
796 843 entry->dbe_refcnt--;
797 844 count++;
798 845 found = TRUE;
799 846 }
800 847 mutex_exit(entry->dbe_lock);
801 848 }
802 849 if (found) {
803 850 if (!rw_tryupgrade(bp->dbk_lock)) {
804 851 rw_exit(bp->dbk_lock);
805 852 rw_enter(bp->dbk_lock, RW_WRITER);
806 853 }
807 854
808 855 l = bp->dbk_head;
809 856 while (l) {
810 857 t = l;
811 858 entry = t->entry;
812 859 l = l->next;
813 860 if (entry->dbe_refcnt == 0) {
814 861 DEQUEUE(bp->dbk_head, t);
815 862 t->next = NULL;
816 863 t->prev = NULL;
817 864 INVALIDATE_ADDR(t->entry);
818 865 rfs4_dbe_destroy(entry);
819 866 }
820 867 }
821 868 }
822 869 rw_exit(bp->dbk_lock);
823 870 /*
824 871 * delay slightly if there is more work to do
825 872 * with the expectation that other reaper
826 873 * threads are freeing data structures as well
827 874 * and in turn will reduce ref counts on
828 875 * entries in this table allowing them to be
829 876 * released. This is only done in the
830 877 * instance that the tables are being shut down.
831 878 */
832 879 if (table->dbt_reaper_shutdown && bp->dbk_head != NULL)
833 880 delay(hz/100);
834 881 /*
835 882 * If this is a table shutdown, keep going until
836 883 * everything is gone
837 884 */
838 885 } while (table->dbt_reaper_shutdown && bp->dbk_head != NULL);
839 886
840 887 if (!table->dbt_reaper_shutdown && desired && count >= desired)
841 888 break;
842 889 }
843 890
844 891 NFS4_DEBUG(table->dbt_debug & REAP_DEBUG,
845 892 (CE_NOTE, "Reaped %d entries older than %ld seconds in table %s",
846 893 count, cache_time, table->dbt_name));
847 894 }
848 895
849 896 static void
850 897 reaper_thread(caddr_t *arg)
851 898 {
852 899 rfs4_table_t *table = (rfs4_table_t *)arg;
853 900 clock_t rc;
854 901
855 902 NFS4_DEBUG(table->dbt_debug,
856 903 (CE_NOTE, "rfs4_reaper_thread starting for %s", table->dbt_name));
857 904
858 905 CALLB_CPR_INIT(&table->dbt_reaper_cpr_info, &table->dbt_reaper_cv_lock,
859 906 callb_generic_cpr, "nfsv4Reaper");
860 907
861 908 mutex_enter(&table->dbt_reaper_cv_lock);
862 909 do {
863 910 CALLB_CPR_SAFE_BEGIN(&table->dbt_reaper_cpr_info);
864 911 rc = cv_reltimedwait_sig(&table->dbt_reaper_wait,
865 912 &table->dbt_reaper_cv_lock,
866 913 SEC_TO_TICK(table->dbt_id_reap), TR_CLOCK_TICK);
867 914 CALLB_CPR_SAFE_END(&table->dbt_reaper_cpr_info,
868 915 &table->dbt_reaper_cv_lock);
869 916 rfs4_dbe_reap(table, table->dbt_max_cache_time, 0);
870 917 } while (rc != 0 && table->dbt_reaper_shutdown == FALSE);
871 918
|
↓ open down ↓ |
170 lines elided |
↑ open up ↑ |
872 919 CALLB_CPR_EXIT(&table->dbt_reaper_cpr_info);
873 920
874 921 NFS4_DEBUG(table->dbt_debug,
875 922 (CE_NOTE, "rfs4_reaper_thread exiting for %s", table->dbt_name));
876 923
877 924 /* Notify the database shutdown processing that the table is shutdown */
878 925 mutex_enter(table->dbt_db->db_lock);
879 926 table->dbt_db->db_shutdown_count--;
880 927 cv_signal(&table->dbt_db->db_shutdown_wait);
881 928 mutex_exit(table->dbt_db->db_lock);
929 + zthread_exit();
882 930 }
883 931
884 932 static void
885 933 rfs4_start_reaper(rfs4_table_t *table)
886 934 {
887 935 if (table->dbt_max_cache_time == 0)
888 936 return;
889 937
890 - (void) thread_create(NULL, 0, reaper_thread, table, 0, &p0, TS_RUN,
938 + (void) zthread_create(NULL, 0, reaper_thread, table, 0,
891 939 minclsyspri);
892 940 }
893 941
894 942 #ifdef DEBUG
895 943 void
896 944 rfs4_dbe_debug(rfs4_dbe_t *entry)
897 945 {
898 946 cmn_err(CE_NOTE, "Entry %p from table %s",
899 947 (void *)entry, entry->dbe_table->dbt_name);
900 948 cmn_err(CE_CONT, "\trefcnt = %d id = %d",
901 949 entry->dbe_refcnt, entry->dbe_id);
902 950 }
903 951 #endif
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX