1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 #include <sys/systm.h>
  26 #include <sys/cmn_err.h>
  27 #include <sys/kmem.h>
  28 #include <sys/disp.h>
  29 #include <sys/id_space.h>
  30 #include <sys/atomic.h>
  31 #include <rpc/rpc.h>
  32 #include <nfs/nfs4.h>
  33 #include <nfs/nfs4_db_impl.h>
  34 #include <sys/sdt.h>
  35 
  36 static int rfs4_reap_interval = RFS4_REAP_INTERVAL;
  37 
  38 static void rfs4_dbe_reap(rfs4_table_t *, time_t, uint32_t);
  39 static void rfs4_dbe_destroy(rfs4_dbe_t *);
  40 static rfs4_dbe_t *rfs4_dbe_create(rfs4_table_t *, id_t, rfs4_entry_t);
  41 static void rfs4_start_reaper(rfs4_table_t *);
  42 
  43 /*
  44  * t_lowat - integer percentage of table entries        /etc/system only
  45  * t_hiwat - integer percentage of table entries        /etc/system only
  46  * t_lreap - integer percentage of table reap time      mdb or /etc/system
  47  * t_hreap - integer percentage of table reap time      mdb or /etc/system
  48  */
  49 uint32_t        t_lowat = 50;   /* reap at t_lreap when id's in use hit 50% */
  50 uint32_t        t_hiwat = 75;   /* reap at t_hreap when id's in use hit 75% */
  51 time_t          t_lreap = 50;   /* default to 50% of table's reap interval */
  52 time_t          t_hreap = 10;   /* default to 10% of table's reap interval */
  53 
  54 id_t
  55 rfs4_dbe_getid(rfs4_dbe_t *entry)
  56 {
  57         return (entry->dbe_id);
  58 }
  59 
  60 void
  61 rfs4_dbe_hold(rfs4_dbe_t *entry)
  62 {
  63         atomic_inc_32(&entry->dbe_refcnt);
  64 }
  65 
  66 /*
  67  * rfs4_dbe_rele_nolock only decrements the reference count of the entry.
  68  */
  69 void
  70 rfs4_dbe_rele_nolock(rfs4_dbe_t *entry)
  71 {
  72         atomic_dec_32(&entry->dbe_refcnt);
  73 }
  74 
  75 
  76 uint32_t
  77 rfs4_dbe_refcnt(rfs4_dbe_t *entry)
  78 {
  79         return (entry->dbe_refcnt);
  80 }
  81 
  82 /*
  83  * Mark an entry such that the dbsearch will skip it.
  84  * Caller does not want this entry to be found any longer
  85  */
  86 void
  87 rfs4_dbe_invalidate(rfs4_dbe_t *entry)
  88 {
  89         entry->dbe_invalid = TRUE;
  90         entry->dbe_skipsearch = TRUE;
  91 }
  92 
  93 /*
  94  * Is this entry invalid?
  95  */
  96 bool_t
  97 rfs4_dbe_is_invalid(rfs4_dbe_t *entry)
  98 {
  99         return (entry->dbe_invalid);
 100 }
 101 
 102 time_t
 103 rfs4_dbe_get_timerele(rfs4_dbe_t *entry)
 104 {
 105         return (entry->dbe_time_rele);
 106 }
 107 
 108 /*
 109  * Use these to temporarily hide/unhide a db entry.
 110  */
 
 112 rfs4_dbe_hide(rfs4_dbe_t *entry)
 113 {
 114         rfs4_dbe_lock(entry);
 115         entry->dbe_skipsearch = TRUE;
 116         rfs4_dbe_unlock(entry);
 117 }
 118 
 119 void
 120 rfs4_dbe_unhide(rfs4_dbe_t *entry)
 121 {
 122         rfs4_dbe_lock(entry);
 123         entry->dbe_skipsearch = FALSE;
 124         rfs4_dbe_unlock(entry);
 125 }
 126 
 127 void
 128 rfs4_dbe_rele(rfs4_dbe_t *entry)
 129 {
 130         mutex_enter(entry->dbe_lock);
 131         ASSERT(entry->dbe_refcnt > 1);
 132         atomic_dec_32(&entry->dbe_refcnt);
 133         entry->dbe_time_rele = gethrestime_sec();
 134         mutex_exit(entry->dbe_lock);
 135 }
 136 
 137 void
 138 rfs4_dbe_lock(rfs4_dbe_t *entry)
 139 {
 140         mutex_enter(entry->dbe_lock);
 141 }
 142 
 143 void
 144 rfs4_dbe_unlock(rfs4_dbe_t *entry)
 145 {
 146         mutex_exit(entry->dbe_lock);
 147 }
 148 
 149 bool_t
 150 rfs4_dbe_islocked(rfs4_dbe_t *entry)
 151 {
 152         return (mutex_owned(entry->dbe_lock));
 
 232 /*
 233  * Given a database that has been "shutdown" by the function above all
 234  * of the table tables are destroyed and then the database itself
 235  * freed.
 236  */
 237 void
 238 rfs4_database_destroy(rfs4_database_t *db)
 239 {
 240         rfs4_table_t *next, *tmp;
 241 
 242         for (next = db->db_tables; next; ) {
 243                 tmp = next;
 244                 next = tmp->dbt_tnext;
 245                 rfs4_table_destroy(db, tmp);
 246         }
 247 
 248         mutex_destroy(db->db_lock);
 249         kmem_free(db, sizeof (rfs4_database_t));
 250 }
 251 
 252 rfs4_table_t *
 253 rfs4_table_create(rfs4_database_t *db, char *tabname, time_t max_cache_time,
 254     uint32_t idxcnt, bool_t (*create)(rfs4_entry_t, void *),
 255     void (*destroy)(rfs4_entry_t),
 256     bool_t (*expiry)(rfs4_entry_t),
 257     uint32_t size, uint32_t hashsize,
 258     uint32_t maxentries, id_t start)
 259 {
 260         rfs4_table_t    *table;
 261         int              len;
 262         char            *cache_name;
 263         char            *id_name;
 264 
 265         table = kmem_alloc(sizeof (rfs4_table_t), KM_SLEEP);
 266         table->dbt_db = db;
 267         rw_init(table->dbt_t_lock, NULL, RW_DEFAULT, NULL);
 268         mutex_init(table->dbt_lock, NULL, MUTEX_DEFAULT, NULL);
 269         mutex_init(&table->dbt_reaper_cv_lock, NULL, MUTEX_DEFAULT, NULL);
 270         cv_init(&table->dbt_reaper_wait, NULL, CV_DEFAULT, NULL);
 271 
 
 287 
 288         if (start >= 0) {
 289                 if (maxentries + (uint32_t)start > (uint32_t)INT32_MAX)
 290                         maxentries = INT32_MAX - start;
 291                 id_name = kmem_alloc(len + 9 /* "_id_space" */ + 1, KM_SLEEP);
 292                 (void) sprintf(id_name, "%s_id_space", table->dbt_name);
 293                 table->dbt_id_space = id_space_create(id_name, start,
 294                     maxentries + start);
 295                 kmem_free(id_name, len + 10);
 296         }
 297         ASSERT(t_lowat != 0);
 298         table->dbt_id_lwat = (maxentries * t_lowat) / 100;
 299         ASSERT(t_hiwat != 0);
 300         table->dbt_id_hwat = (maxentries * t_hiwat) / 100;
 301         table->dbt_id_reap = MIN(rfs4_reap_interval, max_cache_time);
 302         table->dbt_maxentries = maxentries;
 303         table->dbt_create = create;
 304         table->dbt_destroy = destroy;
 305         table->dbt_expiry = expiry;
 306 
 307         table->dbt_mem_cache = kmem_cache_create(cache_name,
 308             sizeof (rfs4_dbe_t) + idxcnt * sizeof (rfs4_link_t) + size,
 309             0,
 310             rfs4_dbe_kmem_constructor,
 311             rfs4_dbe_kmem_destructor,
 312             NULL,
 313             table,
 314             NULL,
 315             0);
 316         kmem_free(cache_name, len+13);
 317 
 318         table->dbt_debug = db->db_debug_flags;
 319 
 320         mutex_enter(db->db_lock);
 321         table->dbt_tnext = db->db_tables;
 322         db->db_tables = table;
 323         mutex_exit(db->db_lock);
 324 
 325         rfs4_start_reaper(table);
 326 
 327         return (table);
 328 }
 329 
 330 void
 331 rfs4_table_destroy(rfs4_database_t *db, rfs4_table_t *table)
 332 {
 333         rfs4_table_t *p;
 334         rfs4_index_t *idx;
 335 
 
 347                         }
 348                 ASSERT(p != NULL);
 349         }
 350         mutex_exit(db->db_lock);
 351 
 352         /* Destroy indices */
 353         while (table->dbt_indices) {
 354                 idx = table->dbt_indices;
 355                 table->dbt_indices = idx->dbi_inext;
 356                 rfs4_index_destroy(idx);
 357         }
 358 
 359         rw_destroy(table->dbt_t_lock);
 360         mutex_destroy(table->dbt_lock);
 361         mutex_destroy(&table->dbt_reaper_cv_lock);
 362         cv_destroy(&table->dbt_reaper_wait);
 363 
 364         kmem_free(table->dbt_name, strlen(table->dbt_name) + 1);
 365         if (table->dbt_id_space)
 366                 id_space_destroy(table->dbt_id_space);
 367         kmem_cache_destroy(table->dbt_mem_cache);
 368         kmem_free(table, sizeof (rfs4_table_t));
 369 }
 370 
 371 rfs4_index_t *
 372 rfs4_index_create(rfs4_table_t *table, char *keyname,
 373     uint32_t (*hash)(void *),
 374     bool_t (compare)(rfs4_entry_t, void *),
 375     void *(*mkkey)(rfs4_entry_t),
 376     bool_t createable)
 377 {
 378         rfs4_index_t *idx;
 379 
 380         ASSERT(table->dbt_idxcnt < table->dbt_maxcnt);
 381 
 382         idx = kmem_alloc(sizeof (rfs4_index_t), KM_SLEEP);
 383 
 384         idx->dbi_table = table;
 385         idx->dbi_keyname = kmem_alloc(strlen(keyname) + 1, KM_SLEEP);
 386         (void) strcpy(idx->dbi_keyname, keyname);
 387         idx->dbi_hash = hash;
 
 666                         continue;
 667                 l = &entry->dbe_indices[ip->dbi_tblidx];
 668                 i = HASH(ip, ip->dbi_mkkey(entry->dbe_data));
 669                 ASSERT(i < ip->dbi_table->dbt_len);
 670                 bp = &ip->dbi_buckets[i];
 671                 ENQUEUE_IDX(bp, l);
 672         }
 673 
 674         NFS4_DEBUG(
 675             table->dbt_debug & SEARCH_DEBUG || table->dbt_debug & CREATE_DEBUG,
 676             (CE_NOTE, "Entry %p created for %s = %p in table %s",
 677             (void*)entry, idx->dbi_keyname, (void*)key, table->dbt_name));
 678 
 679         return (entry->dbe_data);
 680 }
 681 
 682 /*ARGSUSED*/
 683 boolean_t
 684 rfs4_cpr_callb(void *arg, int code)
 685 {
 686         rfs4_table_t *table = rfs4_client_tab;
 687         rfs4_bucket_t *buckets, *bp;
 688         rfs4_link_t *l;
 689         rfs4_client_t *cp;
 690         int i;
 691 
 692         /*
 693          * We get called for Suspend and Resume events.
 694          * For the suspend case we simply don't care!  Nor do we care if
 695          * there are no clients.
 696          */
 697         if (code == CB_CODE_CPR_CHKPT || table == NULL) {
 698                 return (B_TRUE);
 699         }
 700 
 701         buckets = table->dbt_indices->dbi_buckets;
 702 
 703         /*
 704          * When we get this far we are in the process of
 705          * resuming the system from a previous suspend.
 706          *
 707          * We are going to blast through and update the
 708          * last_access time for all the clients and in
 709          * doing so extend them by one lease period.
 710          */
 711         for (i = 0; i < table->dbt_len; i++) {
 
 758 }
 759 
 760 
 761 static void
 762 rfs4_dbe_reap(rfs4_table_t *table, time_t cache_time, uint32_t desired)
 763 {
 764         rfs4_index_t *idx = table->dbt_indices;
 765         rfs4_bucket_t *buckets = idx->dbi_buckets, *bp;
 766         rfs4_link_t *l, *t;
 767         rfs4_dbe_t *entry;
 768         bool_t found;
 769         int i;
 770         int count = 0;
 771 
 772         NFS4_DEBUG(table->dbt_debug & REAP_DEBUG,
 773             (CE_NOTE, "Reaping %d entries older than %ld seconds in table %s",
 774             desired, cache_time, table->dbt_name));
 775 
 776         /* Walk the buckets looking for entries to release/destroy */
 777         for (i = 0; i < table->dbt_len; i++) {
 778                 bp = &buckets[i];
 779                 do {
 780                         found = FALSE;
 781                         rw_enter(bp->dbk_lock, RW_READER);
 782                         for (l = bp->dbk_head; l; l = l->next) {
 783                                 entry = l->entry;
 784                                 /*
 785                                  * Examine an entry.  Ref count of 1 means
 786                                  * that the only reference is for the hash
 787                                  * table reference.
 788                                  */
 789                                 if (entry->dbe_refcnt != 1)
 790                                         continue;
 791                                 mutex_enter(entry->dbe_lock);
 792                                 if ((entry->dbe_refcnt == 1) &&
 793                                     (table->dbt_reaper_shutdown ||
 794                                     table->dbt_expiry == NULL ||
 795                                     (*table->dbt_expiry)(entry->dbe_data))) {
 796                                         entry->dbe_refcnt--;
 797                                         count++;
 798                                         found = TRUE;
 799                                 }
 800                                 mutex_exit(entry->dbe_lock);
 801                         }
 802                         if (found) {
 803                                 if (!rw_tryupgrade(bp->dbk_lock)) {
 804                                         rw_exit(bp->dbk_lock);
 805                                         rw_enter(bp->dbk_lock, RW_WRITER);
 806                                 }
 807 
 808                                 l = bp->dbk_head;
 809                                 while (l) {
 810                                         t = l;
 811                                         entry = t->entry;
 812                                         l = l->next;
 813                                         if (entry->dbe_refcnt == 0) {
 814                                                 DEQUEUE(bp->dbk_head, t);
 815                                                 t->next = NULL;
 816                                                 t->prev = NULL;
 817                                                 INVALIDATE_ADDR(t->entry);
 818                                                 rfs4_dbe_destroy(entry);
 819                                         }
 820                                 }
 821                         }
 822                         rw_exit(bp->dbk_lock);
 823                         /*
 824                          * delay slightly if there is more work to do
 825                          * with the expectation that other reaper
 826                          * threads are freeing data structures as well
 827                          * and in turn will reduce ref counts on
 828                          * entries in this table allowing them to be
 829                          * released.  This is only done in the
 830                          * instance that the tables are being shut down.
 831                          */
 832                         if (table->dbt_reaper_shutdown && bp->dbk_head != NULL)
 833                                 delay(hz/100);
 834                 /*
 835                  * If this is a table shutdown, keep going until
 836                  * everything is gone
 837                  */
 838                 } while (table->dbt_reaper_shutdown && bp->dbk_head != NULL);
 839 
 840                 if (!table->dbt_reaper_shutdown && desired && count >= desired)
 841                         break;
 842         }
 843 
 844         NFS4_DEBUG(table->dbt_debug & REAP_DEBUG,
 845             (CE_NOTE, "Reaped %d entries older than %ld seconds in table %s",
 846             count, cache_time, table->dbt_name));
 847 }
 848 
 849 static void
 850 reaper_thread(caddr_t *arg)
 851 {
 852         rfs4_table_t    *table = (rfs4_table_t *)arg;
 853         clock_t          rc;
 854 
 855         NFS4_DEBUG(table->dbt_debug,
 856             (CE_NOTE, "rfs4_reaper_thread starting for %s", table->dbt_name));
 857 
 858         CALLB_CPR_INIT(&table->dbt_reaper_cpr_info, &table->dbt_reaper_cv_lock,
 
 862         do {
 863                 CALLB_CPR_SAFE_BEGIN(&table->dbt_reaper_cpr_info);
 864                 rc = cv_reltimedwait_sig(&table->dbt_reaper_wait,
 865                     &table->dbt_reaper_cv_lock,
 866                     SEC_TO_TICK(table->dbt_id_reap), TR_CLOCK_TICK);
 867                 CALLB_CPR_SAFE_END(&table->dbt_reaper_cpr_info,
 868                     &table->dbt_reaper_cv_lock);
 869                 rfs4_dbe_reap(table, table->dbt_max_cache_time, 0);
 870         } while (rc != 0 && table->dbt_reaper_shutdown == FALSE);
 871 
 872         CALLB_CPR_EXIT(&table->dbt_reaper_cpr_info);
 873 
 874         NFS4_DEBUG(table->dbt_debug,
 875             (CE_NOTE, "rfs4_reaper_thread exiting for %s", table->dbt_name));
 876 
 877         /* Notify the database shutdown processing that the table is shutdown */
 878         mutex_enter(table->dbt_db->db_lock);
 879         table->dbt_db->db_shutdown_count--;
 880         cv_signal(&table->dbt_db->db_shutdown_wait);
 881         mutex_exit(table->dbt_db->db_lock);
 882 }
 883 
 884 static void
 885 rfs4_start_reaper(rfs4_table_t *table)
 886 {
 887         if (table->dbt_max_cache_time == 0)
 888                 return;
 889 
 890         (void) thread_create(NULL, 0, reaper_thread, table, 0, &p0, TS_RUN,
 891             minclsyspri);
 892 }
 893 
 894 #ifdef DEBUG
 895 void
 896 rfs4_dbe_debug(rfs4_dbe_t *entry)
 897 {
 898         cmn_err(CE_NOTE, "Entry %p from table %s",
 899             (void *)entry, entry->dbe_table->dbt_name);
 900         cmn_err(CE_CONT, "\trefcnt = %d id = %d",
 901             entry->dbe_refcnt, entry->dbe_id);
 902 }
 903 #endif
 | 
   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright 2019 Nexenta Systems, Inc.
  28  */
  29 
  30 #include <sys/systm.h>
  31 #include <sys/cmn_err.h>
  32 #include <sys/kmem.h>
  33 #include <sys/disp.h>
  34 #include <sys/id_space.h>
  35 #include <rpc/rpc.h>
  36 #include <nfs/nfs4.h>
  37 #include <nfs/nfs4_db_impl.h>
  38 #include <sys/sdt.h>
  39 
  40 static int rfs4_reap_interval = RFS4_REAP_INTERVAL;
  41 
  42 static void rfs4_dbe_reap(rfs4_table_t *, time_t, uint32_t);
  43 static void rfs4_dbe_destroy(rfs4_dbe_t *);
  44 static rfs4_dbe_t *rfs4_dbe_create(rfs4_table_t *, id_t, rfs4_entry_t);
  45 static void rfs4_start_reaper(rfs4_table_t *);
  46 
  47 /*
  48  * t_lowat - integer percentage of table entries        /etc/system only
  49  * t_hiwat - integer percentage of table entries        /etc/system only
  50  * t_lreap - integer percentage of table reap time      mdb or /etc/system
  51  * t_hreap - integer percentage of table reap time      mdb or /etc/system
  52  */
  53 uint32_t        t_lowat = 50;   /* reap at t_lreap when id's in use hit 50% */
  54 uint32_t        t_hiwat = 75;   /* reap at t_hreap when id's in use hit 75% */
  55 time_t          t_lreap = 50;   /* default to 50% of table's reap interval */
  56 time_t          t_hreap = 10;   /* default to 10% of table's reap interval */
  57 
  58 id_t
  59 rfs4_dbe_getid(rfs4_dbe_t *entry)
  60 {
  61         return (entry->dbe_id);
  62 }
  63 
  64 void
  65 rfs4_dbe_hold(rfs4_dbe_t *entry)
  66 {
  67         if (!MUTEX_HELD(entry->dbe_lock)) {
  68                 mutex_enter(entry->dbe_lock);
  69                 entry->dbe_refcnt++;
  70                 mutex_exit(entry->dbe_lock);
  71         } else {
  72                 entry->dbe_refcnt++;
  73         }
  74 }
  75 
  76 /*
  77  * rfs4_dbe_rele_nolock only decrements the reference count of the entry.
  78  */
  79 void
  80 rfs4_dbe_rele_nolock(rfs4_dbe_t *entry)
  81 {
  82         if (!MUTEX_HELD(entry->dbe_lock)) {
  83                 ASSERT(entry->dbe_refcnt > 0);
  84                 mutex_enter(entry->dbe_lock);
  85                 entry->dbe_refcnt--;
  86                 mutex_exit(entry->dbe_lock);
  87         } else {
  88                 entry->dbe_refcnt--;
  89         }
  90 }
  91 
  92 
  93 uint32_t
  94 rfs4_dbe_refcnt(rfs4_dbe_t *entry)
  95 {
  96         return (entry->dbe_refcnt);
  97 }
  98 
  99 /*
 100  * Mark an entry such that the dbsearch will skip it.
 101  * Caller does not want this entry to be found any longer
 102  */
 103 void
 104 rfs4_dbe_invalidate(rfs4_dbe_t *entry)
 105 {
 106         if (!MUTEX_HELD(entry->dbe_lock)) {
 107                 mutex_enter(entry->dbe_lock);
 108                 entry->dbe_invalid = TRUE;
 109                 entry->dbe_skipsearch = TRUE;
 110                 mutex_exit(entry->dbe_lock);
 111         } else {
 112                 entry->dbe_invalid = TRUE;
 113                 entry->dbe_skipsearch = TRUE;
 114         }
 115 }
 116 
 117 /*
 118  * Is this entry invalid?
 119  */
 120 bool_t
 121 rfs4_dbe_is_invalid(rfs4_dbe_t *entry)
 122 {
 123         return (entry->dbe_invalid);
 124 }
 125 
 126 time_t
 127 rfs4_dbe_get_timerele(rfs4_dbe_t *entry)
 128 {
 129         return (entry->dbe_time_rele);
 130 }
 131 
 132 /*
 133  * Use these to temporarily hide/unhide a db entry.
 134  */
 
 136 rfs4_dbe_hide(rfs4_dbe_t *entry)
 137 {
 138         rfs4_dbe_lock(entry);
 139         entry->dbe_skipsearch = TRUE;
 140         rfs4_dbe_unlock(entry);
 141 }
 142 
 143 void
 144 rfs4_dbe_unhide(rfs4_dbe_t *entry)
 145 {
 146         rfs4_dbe_lock(entry);
 147         entry->dbe_skipsearch = FALSE;
 148         rfs4_dbe_unlock(entry);
 149 }
 150 
 151 void
 152 rfs4_dbe_rele(rfs4_dbe_t *entry)
 153 {
 154         mutex_enter(entry->dbe_lock);
 155         ASSERT(entry->dbe_refcnt > 1);
 156         entry->dbe_refcnt--;
 157         entry->dbe_time_rele = gethrestime_sec();
 158         mutex_exit(entry->dbe_lock);
 159 }
 160 
 161 void
 162 rfs4_dbe_lock(rfs4_dbe_t *entry)
 163 {
 164         mutex_enter(entry->dbe_lock);
 165 }
 166 
 167 void
 168 rfs4_dbe_unlock(rfs4_dbe_t *entry)
 169 {
 170         mutex_exit(entry->dbe_lock);
 171 }
 172 
 173 bool_t
 174 rfs4_dbe_islocked(rfs4_dbe_t *entry)
 175 {
 176         return (mutex_owned(entry->dbe_lock));
 
 256 /*
 257  * Given a database that has been "shutdown" by the function above all
 258  * of the table tables are destroyed and then the database itself
 259  * freed.
 260  */
 261 void
 262 rfs4_database_destroy(rfs4_database_t *db)
 263 {
 264         rfs4_table_t *next, *tmp;
 265 
 266         for (next = db->db_tables; next; ) {
 267                 tmp = next;
 268                 next = tmp->dbt_tnext;
 269                 rfs4_table_destroy(db, tmp);
 270         }
 271 
 272         mutex_destroy(db->db_lock);
 273         kmem_free(db, sizeof (rfs4_database_t));
 274 }
 275 
 276 /*
 277  * Used to get the correct kmem_cache database for the state table being
 278  * created.
 279  * Helper function for rfs4_table_create
 280  */
 281 static kmem_cache_t *
 282 get_db_mem_cache(char *name)
 283 {
 284         int i;
 285 
 286         for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) {
 287                 if (strcmp(name, rfs4_db_mem_cache_table[i].r_db_name) == 0)
 288                         return (rfs4_db_mem_cache_table[i].r_db_mem_cache);
 289         }
 290         /*
 291          * There is no associated kmem cache for this NFS4 server state
 292          * table name
 293          */
 294         return (NULL);
 295 }
 296 
 297 /*
 298  * Used to initialize the global NFSv4 server state database.
 299  * Helper funtion for rfs4_state_g_init and called when module is loaded.
 300  */
 301 kmem_cache_t *
 302 /* CSTYLED */
 303 nfs4_init_mem_cache(char *cache_name, uint32_t idxcnt, uint32_t size, uint32_t idx)
 304 {
 305         kmem_cache_t *mem_cache = kmem_cache_create(cache_name,
 306             sizeof (rfs4_dbe_t) + idxcnt * sizeof (rfs4_link_t) + size,
 307             0,
 308             rfs4_dbe_kmem_constructor,
 309             rfs4_dbe_kmem_destructor,
 310             NULL,
 311             NULL,
 312             NULL,
 313             0);
 314         (void) strlcpy(rfs4_db_mem_cache_table[idx].r_db_name, cache_name,
 315             strlen(cache_name) + 1);
 316         rfs4_db_mem_cache_table[idx].r_db_mem_cache = mem_cache;
 317         return (mem_cache);
 318 }
 319 
 320 rfs4_table_t *
 321 rfs4_table_create(rfs4_database_t *db, char *tabname, time_t max_cache_time,
 322     uint32_t idxcnt, bool_t (*create)(rfs4_entry_t, void *),
 323     void (*destroy)(rfs4_entry_t),
 324     bool_t (*expiry)(rfs4_entry_t),
 325     uint32_t size, uint32_t hashsize,
 326     uint32_t maxentries, id_t start)
 327 {
 328         rfs4_table_t    *table;
 329         int              len;
 330         char            *cache_name;
 331         char            *id_name;
 332 
 333         table = kmem_alloc(sizeof (rfs4_table_t), KM_SLEEP);
 334         table->dbt_db = db;
 335         rw_init(table->dbt_t_lock, NULL, RW_DEFAULT, NULL);
 336         mutex_init(table->dbt_lock, NULL, MUTEX_DEFAULT, NULL);
 337         mutex_init(&table->dbt_reaper_cv_lock, NULL, MUTEX_DEFAULT, NULL);
 338         cv_init(&table->dbt_reaper_wait, NULL, CV_DEFAULT, NULL);
 339 
 
 355 
 356         if (start >= 0) {
 357                 if (maxentries + (uint32_t)start > (uint32_t)INT32_MAX)
 358                         maxentries = INT32_MAX - start;
 359                 id_name = kmem_alloc(len + 9 /* "_id_space" */ + 1, KM_SLEEP);
 360                 (void) sprintf(id_name, "%s_id_space", table->dbt_name);
 361                 table->dbt_id_space = id_space_create(id_name, start,
 362                     maxentries + start);
 363                 kmem_free(id_name, len + 10);
 364         }
 365         ASSERT(t_lowat != 0);
 366         table->dbt_id_lwat = (maxentries * t_lowat) / 100;
 367         ASSERT(t_hiwat != 0);
 368         table->dbt_id_hwat = (maxentries * t_hiwat) / 100;
 369         table->dbt_id_reap = MIN(rfs4_reap_interval, max_cache_time);
 370         table->dbt_maxentries = maxentries;
 371         table->dbt_create = create;
 372         table->dbt_destroy = destroy;
 373         table->dbt_expiry = expiry;
 374 
 375         /*
 376          * get the correct kmem_cache for this table type based on the name.
 377          */
 378         table->dbt_mem_cache = get_db_mem_cache(cache_name);
 379 
 380         kmem_free(cache_name, len+13);
 381 
 382         table->dbt_debug = db->db_debug_flags;
 383 
 384         mutex_enter(db->db_lock);
 385         table->dbt_tnext = db->db_tables;
 386         db->db_tables = table;
 387         mutex_exit(db->db_lock);
 388 
 389         rfs4_start_reaper(table);
 390 
 391         return (table);
 392 }
 393 
 394 void
 395 rfs4_table_destroy(rfs4_database_t *db, rfs4_table_t *table)
 396 {
 397         rfs4_table_t *p;
 398         rfs4_index_t *idx;
 399 
 
 411                         }
 412                 ASSERT(p != NULL);
 413         }
 414         mutex_exit(db->db_lock);
 415 
 416         /* Destroy indices */
 417         while (table->dbt_indices) {
 418                 idx = table->dbt_indices;
 419                 table->dbt_indices = idx->dbi_inext;
 420                 rfs4_index_destroy(idx);
 421         }
 422 
 423         rw_destroy(table->dbt_t_lock);
 424         mutex_destroy(table->dbt_lock);
 425         mutex_destroy(&table->dbt_reaper_cv_lock);
 426         cv_destroy(&table->dbt_reaper_wait);
 427 
 428         kmem_free(table->dbt_name, strlen(table->dbt_name) + 1);
 429         if (table->dbt_id_space)
 430                 id_space_destroy(table->dbt_id_space);
 431         table->dbt_mem_cache = NULL;
 432         kmem_free(table, sizeof (rfs4_table_t));
 433 }
 434 
 435 rfs4_index_t *
 436 rfs4_index_create(rfs4_table_t *table, char *keyname,
 437     uint32_t (*hash)(void *),
 438     bool_t (compare)(rfs4_entry_t, void *),
 439     void *(*mkkey)(rfs4_entry_t),
 440     bool_t createable)
 441 {
 442         rfs4_index_t *idx;
 443 
 444         ASSERT(table->dbt_idxcnt < table->dbt_maxcnt);
 445 
 446         idx = kmem_alloc(sizeof (rfs4_index_t), KM_SLEEP);
 447 
 448         idx->dbi_table = table;
 449         idx->dbi_keyname = kmem_alloc(strlen(keyname) + 1, KM_SLEEP);
 450         (void) strcpy(idx->dbi_keyname, keyname);
 451         idx->dbi_hash = hash;
 
 730                         continue;
 731                 l = &entry->dbe_indices[ip->dbi_tblidx];
 732                 i = HASH(ip, ip->dbi_mkkey(entry->dbe_data));
 733                 ASSERT(i < ip->dbi_table->dbt_len);
 734                 bp = &ip->dbi_buckets[i];
 735                 ENQUEUE_IDX(bp, l);
 736         }
 737 
 738         NFS4_DEBUG(
 739             table->dbt_debug & SEARCH_DEBUG || table->dbt_debug & CREATE_DEBUG,
 740             (CE_NOTE, "Entry %p created for %s = %p in table %s",
 741             (void*)entry, idx->dbi_keyname, (void*)key, table->dbt_name));
 742 
 743         return (entry->dbe_data);
 744 }
 745 
 746 /*ARGSUSED*/
 747 boolean_t
 748 rfs4_cpr_callb(void *arg, int code)
 749 {
 750         rfs4_bucket_t *buckets, *bp;
 751         rfs4_link_t *l;
 752         rfs4_client_t *cp;
 753         int i;
 754 
 755         nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
 756         rfs4_table_t *table = nsrv4->rfs4_client_tab;
 757 
 758         /*
 759          * We get called for Suspend and Resume events.
 760          * For the suspend case we simply don't care!  Nor do we care if
 761          * there are no clients.
 762          */
 763         if (code == CB_CODE_CPR_CHKPT || table == NULL) {
 764                 return (B_TRUE);
 765         }
 766 
 767         buckets = table->dbt_indices->dbi_buckets;
 768 
 769         /*
 770          * When we get this far we are in the process of
 771          * resuming the system from a previous suspend.
 772          *
 773          * We are going to blast through and update the
 774          * last_access time for all the clients and in
 775          * doing so extend them by one lease period.
 776          */
 777         for (i = 0; i < table->dbt_len; i++) {
 
 824 }
 825 
 826 
 827 static void
 828 rfs4_dbe_reap(rfs4_table_t *table, time_t cache_time, uint32_t desired)
 829 {
 830         rfs4_index_t *idx = table->dbt_indices;
 831         rfs4_bucket_t *buckets = idx->dbi_buckets, *bp;
 832         rfs4_link_t *l, *t;
 833         rfs4_dbe_t *entry;
 834         bool_t found;
 835         int i;
 836         int count = 0;
 837 
 838         NFS4_DEBUG(table->dbt_debug & REAP_DEBUG,
 839             (CE_NOTE, "Reaping %d entries older than %ld seconds in table %s",
 840             desired, cache_time, table->dbt_name));
 841 
 842         /* Walk the buckets looking for entries to release/destroy */
 843         for (i = 0; i < table->dbt_len; i++) {
 844                 int retries = 0;
 845                 bp = &buckets[i];
 846                 do {
 847                         found = FALSE;
 848                         rw_enter(bp->dbk_lock, RW_READER);
 849                         for (l = bp->dbk_head; l; l = l->next) {
 850                                 entry = l->entry;
 851                                 mutex_enter(entry->dbe_lock);
 852                                 ASSERT(entry->dbe_refcnt != 0);
 853                                 /*
 854                                  * Examine an entry.  Ref count of 1 means
 855                                  * that the only reference is for the hash
 856                                  * table reference.
 857                                  */
 858                                 if (entry->dbe_refcnt != 1) {
 859 #ifdef DEBUG
 860                                         rfs4_dbe_debug(entry);
 861 #endif
 862                                         mutex_exit(entry->dbe_lock);
 863                                         continue;
 864                                 }
 865                                 if ((entry->dbe_refcnt == 1) &&
 866                                     (table->dbt_reaper_shutdown ||
 867                                     table->dbt_expiry == NULL ||
 868                                     (*table->dbt_expiry)(entry->dbe_data))) {
 869                                         rfs4_dbe_rele_nolock(entry);
 870                                         count++;
 871                                         found = TRUE;
 872                                 }
 873                                 mutex_exit(entry->dbe_lock);
 874                         }
 875                         if (found) {
 876                                 if (!rw_tryupgrade(bp->dbk_lock)) {
 877                                         rw_exit(bp->dbk_lock);
 878                                         rw_enter(bp->dbk_lock, RW_WRITER);
 879                                 }
 880 
 881                                 l = bp->dbk_head;
 882                                 while (l) {
 883                                         t = l;
 884                                         entry = t->entry;
 885                                         l = l->next;
 886                                         mutex_enter(entry->dbe_lock);
 887                                         if (entry->dbe_refcnt == 0) {
 888                                                 DEQUEUE(bp->dbk_head, t);
 889                                                 mutex_exit(entry->dbe_lock);
 890                                                 t->next = NULL;
 891                                                 t->prev = NULL;
 892                                                 INVALIDATE_ADDR(t->entry);
 893                                                 rfs4_dbe_destroy(entry);
 894                                         } else
 895                                                 mutex_exit(entry->dbe_lock);
 896                                 }
 897                         }
 898                         rw_exit(bp->dbk_lock);
 899                         /*
 900                          * delay slightly if there is more work to do
 901                          * with the expectation that other reaper
 902                          * threads are freeing data structures as well
 903                          * and in turn will reduce ref counts on
 904                          * entries in this table allowing them to be
 905                          * released.  This is only done in the
 906                          * instance that the tables are being shut down.
 907                          */
 908                         if (table->dbt_reaper_shutdown && bp->dbk_head != NULL) {
 909                                 delay(hz/100);
 910                                 retries++;
 911                         }
 912                 /*
 913                  * If this is a table shutdown, keep going until
 914                  * everything is gone
 915                  */
 916                 } while (table->dbt_reaper_shutdown && bp->dbk_head != NULL && retries < 5);
 917 
 918                 if (!table->dbt_reaper_shutdown && desired && count >= desired)
 919                         break;
 920         }
 921 
 922         NFS4_DEBUG(table->dbt_debug & REAP_DEBUG,
 923             (CE_NOTE, "Reaped %d entries older than %ld seconds in table %s",
 924             count, cache_time, table->dbt_name));
 925 }
 926 
 927 static void
 928 reaper_thread(caddr_t *arg)
 929 {
 930         rfs4_table_t    *table = (rfs4_table_t *)arg;
 931         clock_t          rc;
 932 
 933         NFS4_DEBUG(table->dbt_debug,
 934             (CE_NOTE, "rfs4_reaper_thread starting for %s", table->dbt_name));
 935 
 936         CALLB_CPR_INIT(&table->dbt_reaper_cpr_info, &table->dbt_reaper_cv_lock,
 
 940         do {
 941                 CALLB_CPR_SAFE_BEGIN(&table->dbt_reaper_cpr_info);
 942                 rc = cv_reltimedwait_sig(&table->dbt_reaper_wait,
 943                     &table->dbt_reaper_cv_lock,
 944                     SEC_TO_TICK(table->dbt_id_reap), TR_CLOCK_TICK);
 945                 CALLB_CPR_SAFE_END(&table->dbt_reaper_cpr_info,
 946                     &table->dbt_reaper_cv_lock);
 947                 rfs4_dbe_reap(table, table->dbt_max_cache_time, 0);
 948         } while (rc != 0 && table->dbt_reaper_shutdown == FALSE);
 949 
 950         CALLB_CPR_EXIT(&table->dbt_reaper_cpr_info);
 951 
 952         NFS4_DEBUG(table->dbt_debug,
 953             (CE_NOTE, "rfs4_reaper_thread exiting for %s", table->dbt_name));
 954 
 955         /* Notify the database shutdown processing that the table is shutdown */
 956         mutex_enter(table->dbt_db->db_lock);
 957         table->dbt_db->db_shutdown_count--;
 958         cv_signal(&table->dbt_db->db_shutdown_wait);
 959         mutex_exit(table->dbt_db->db_lock);
 960         zthread_exit();
 961 }
 962 
 963 static void
 964 rfs4_start_reaper(rfs4_table_t *table)
 965 {
 966         if (table->dbt_max_cache_time == 0)
 967                 return;
 968 
 969         (void) zthread_create(NULL, 0, reaper_thread, table, 0,
 970             minclsyspri);
 971 }
 972 
 973 #ifdef DEBUG
 974 void
 975 rfs4_dbe_debug(rfs4_dbe_t *entry)
 976 {
 977         cmn_err(CE_NOTE, "Entry %p from table %s",
 978             (void *)entry, entry->dbe_table->dbt_name);
 979         cmn_err(CE_CONT, "\trefcnt = %d id = %d",
 980             entry->dbe_refcnt, entry->dbe_id);
 981 }
 982 #endif
 |