7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright 2019 Nexenta Systems, Inc.
  28  */
  29 
  30 #include <sys/systm.h>
  31 #include <sys/cmn_err.h>
  32 #include <sys/kmem.h>
  33 #include <sys/disp.h>
  34 #include <sys/id_space.h>
  35 #include <rpc/rpc.h>
  36 #include <nfs/nfs4.h>
  37 #include <nfs/nfs4_db_impl.h>
  38 #include <sys/sdt.h>
  39 
  40 static int rfs4_reap_interval = RFS4_REAP_INTERVAL;
  41 
  42 static void rfs4_dbe_reap(rfs4_table_t *, time_t, uint32_t);
  43 static void rfs4_dbe_destroy(rfs4_dbe_t *);
  44 static rfs4_dbe_t *rfs4_dbe_create(rfs4_table_t *, id_t, rfs4_entry_t);
  45 static void rfs4_start_reaper(rfs4_table_t *);
  46 
  47 /*
  48  * t_lowat - integer percentage of table entries        /etc/system only
  49  * t_hiwat - integer percentage of table entries        /etc/system only
  50  * t_lreap - integer percentage of table reap time      mdb or /etc/system
  51  * t_hreap - integer percentage of table reap time      mdb or /etc/system
  52  */
  53 uint32_t        t_lowat = 50;   /* reap at t_lreap when id's in use hit 50% */
  54 uint32_t        t_hiwat = 75;   /* reap at t_hreap when id's in use hit 75% */
  55 time_t          t_lreap = 50;   /* default to 50% of table's reap interval */
  56 time_t          t_hreap = 10;   /* default to 10% of table's reap interval */
  57 
  58 id_t
  59 rfs4_dbe_getid(rfs4_dbe_t *entry)
  60 {
  61         return (entry->dbe_id);
  62 }
  63 
  64 void
  65 rfs4_dbe_hold(rfs4_dbe_t *entry)
  66 {
  67         if (!MUTEX_HELD(entry->dbe_lock)) {
  68                 mutex_enter(entry->dbe_lock);
  69                 entry->dbe_refcnt++;
  70                 mutex_exit(entry->dbe_lock);
  71         } else {
  72                 entry->dbe_refcnt++;
  73         }
  74 }
  75 
  76 /*
  77  * rfs4_dbe_rele_nolock only decrements the reference count of the entry.
  78  */
  79 void
  80 rfs4_dbe_rele_nolock(rfs4_dbe_t *entry)
  81 {
  82         if (!MUTEX_HELD(entry->dbe_lock)) {
  83                 ASSERT(entry->dbe_refcnt > 0);
  84                 mutex_enter(entry->dbe_lock);
  85                 entry->dbe_refcnt--;
  86                 mutex_exit(entry->dbe_lock);
  87         } else {
  88                 entry->dbe_refcnt--;
  89         }
  90 }
  91 
  92 
  93 uint32_t
  94 rfs4_dbe_refcnt(rfs4_dbe_t *entry)
  95 {
  96         return (entry->dbe_refcnt);
  97 }
  98 
  99 /*
 100  * Mark an entry such that the dbsearch will skip it.
 101  * Caller does not want this entry to be found any longer
 102  */
 103 void
 104 rfs4_dbe_invalidate(rfs4_dbe_t *entry)
 105 {
 106         if (!MUTEX_HELD(entry->dbe_lock)) {
 107                 mutex_enter(entry->dbe_lock);
 108                 entry->dbe_invalid = TRUE;
 109                 entry->dbe_skipsearch = TRUE;
 110                 mutex_exit(entry->dbe_lock);
 111         } else {
 112                 entry->dbe_invalid = TRUE;
 113                 entry->dbe_skipsearch = TRUE;
 114         }
 115 }
 116 
 117 /*
 118  * Is this entry invalid?
 119  */
 120 bool_t
 121 rfs4_dbe_is_invalid(rfs4_dbe_t *entry)
 122 {
 123         return (entry->dbe_invalid);
 124 }
 125 
 126 time_t
 127 rfs4_dbe_get_timerele(rfs4_dbe_t *entry)
 128 {
 129         return (entry->dbe_time_rele);
 130 }
 131 
 132 /*
 133  * Use these to temporarily hide/unhide a db entry.
 134  */
 
 136 rfs4_dbe_hide(rfs4_dbe_t *entry)
 137 {
 138         rfs4_dbe_lock(entry);
 139         entry->dbe_skipsearch = TRUE;
 140         rfs4_dbe_unlock(entry);
 141 }
 142 
 143 void
 144 rfs4_dbe_unhide(rfs4_dbe_t *entry)
 145 {
 146         rfs4_dbe_lock(entry);
 147         entry->dbe_skipsearch = FALSE;
 148         rfs4_dbe_unlock(entry);
 149 }
 150 
 151 void
 152 rfs4_dbe_rele(rfs4_dbe_t *entry)
 153 {
 154         mutex_enter(entry->dbe_lock);
 155         ASSERT(entry->dbe_refcnt > 1);
 156         entry->dbe_refcnt--;
 157         entry->dbe_time_rele = gethrestime_sec();
 158         mutex_exit(entry->dbe_lock);
 159 }
 160 
 161 void
 162 rfs4_dbe_lock(rfs4_dbe_t *entry)
 163 {
 164         mutex_enter(entry->dbe_lock);
 165 }
 166 
 167 void
 168 rfs4_dbe_unlock(rfs4_dbe_t *entry)
 169 {
 170         mutex_exit(entry->dbe_lock);
 171 }
 172 
 173 bool_t
 174 rfs4_dbe_islocked(rfs4_dbe_t *entry)
 175 {
 176         return (mutex_owned(entry->dbe_lock));
 
 824 }
 825 
 826 
 827 static void
 828 rfs4_dbe_reap(rfs4_table_t *table, time_t cache_time, uint32_t desired)
 829 {
 830         rfs4_index_t *idx = table->dbt_indices;
 831         rfs4_bucket_t *buckets = idx->dbi_buckets, *bp;
 832         rfs4_link_t *l, *t;
 833         rfs4_dbe_t *entry;
 834         bool_t found;
 835         int i;
 836         int count = 0;
 837 
 838         NFS4_DEBUG(table->dbt_debug & REAP_DEBUG,
 839             (CE_NOTE, "Reaping %d entries older than %ld seconds in table %s",
 840             desired, cache_time, table->dbt_name));
 841 
 842         /* Walk the buckets looking for entries to release/destroy */
 843         for (i = 0; i < table->dbt_len; i++) {
 844                 int retries = 0;
 845                 bp = &buckets[i];
 846                 do {
 847                         found = FALSE;
 848                         rw_enter(bp->dbk_lock, RW_READER);
 849                         for (l = bp->dbk_head; l; l = l->next) {
 850                                 entry = l->entry;
 851                                 mutex_enter(entry->dbe_lock);
 852                                 ASSERT(entry->dbe_refcnt != 0);
 853                                 /*
 854                                  * Examine an entry.  Ref count of 1 means
 855                                  * that the only reference is for the hash
 856                                  * table reference.
 857                                  */
 858                                 if (entry->dbe_refcnt != 1) {
 859                                         mutex_exit(entry->dbe_lock);
 860                                         continue;
 861                                 }
 862                                 if ((entry->dbe_refcnt == 1) &&
 863                                     (table->dbt_reaper_shutdown ||
 864                                     table->dbt_expiry == NULL ||
 865                                     (*table->dbt_expiry)(entry->dbe_data))) {
 866                                         rfs4_dbe_rele_nolock(entry);
 867                                         count++;
 868                                         found = TRUE;
 869                                 }
 870                                 mutex_exit(entry->dbe_lock);
 871                         }
 872                         if (found) {
 873                                 if (!rw_tryupgrade(bp->dbk_lock)) {
 874                                         rw_exit(bp->dbk_lock);
 875                                         rw_enter(bp->dbk_lock, RW_WRITER);
 876                                 }
 877 
 878                                 l = bp->dbk_head;
 879                                 while (l) {
 880                                         t = l;
 881                                         entry = t->entry;
 882                                         l = l->next;
 883                                         mutex_enter(entry->dbe_lock);
 884                                         if (entry->dbe_refcnt == 0) {
 885                                                 DEQUEUE(bp->dbk_head, t);
 886                                                 mutex_exit(entry->dbe_lock);
 887                                                 t->next = NULL;
 888                                                 t->prev = NULL;
 889                                                 INVALIDATE_ADDR(t->entry);
 890                                                 rfs4_dbe_destroy(entry);
 891                                         } else
 892                                                 mutex_exit(entry->dbe_lock);
 893                                 }
 894                         }
 895                         rw_exit(bp->dbk_lock);
 896                         /*
 897                          * delay slightly if there is more work to do
 898                          * with the expectation that other reaper
 899                          * threads are freeing data structures as well
 900                          * and in turn will reduce ref counts on
 901                          * entries in this table allowing them to be
 902                          * released.  This is only done in the
 903                          * instance that the tables are being shut down.
 904                          */
 905                         if (table->dbt_reaper_shutdown && bp->dbk_head != NULL) {
 906                                 delay(hz/100);
 907                                 retries++;
 908                         }
 909                 /*
 910                  * If this is a table shutdown, keep going until
 911                  * everything is gone
 912                  */
 913                 } while (table->dbt_reaper_shutdown && bp->dbk_head != NULL && retries < 5);
 914 
 915                 if (!table->dbt_reaper_shutdown && desired && count >= desired)
 916                         break;
 917         }
 918 
 919         NFS4_DEBUG(table->dbt_debug & REAP_DEBUG,
 920             (CE_NOTE, "Reaped %d entries older than %ld seconds in table %s",
 921             count, cache_time, table->dbt_name));
 922 }
 923 
 924 static void
 925 reaper_thread(caddr_t *arg)
 926 {
 927         rfs4_table_t    *table = (rfs4_table_t *)arg;
 928         clock_t          rc;
 929 
 930         NFS4_DEBUG(table->dbt_debug,
 931             (CE_NOTE, "rfs4_reaper_thread starting for %s", table->dbt_name));
 932 
 933         CALLB_CPR_INIT(&table->dbt_reaper_cpr_info, &table->dbt_reaper_cv_lock,
 
 | 
 
 
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright 2018 Nexenta Systems, Inc.
  28  */
  29 
  30 #include <sys/systm.h>
  31 #include <sys/cmn_err.h>
  32 #include <sys/kmem.h>
  33 #include <sys/disp.h>
  34 #include <sys/id_space.h>
  35 #include <sys/atomic.h>
  36 #include <rpc/rpc.h>
  37 #include <nfs/nfs4.h>
  38 #include <nfs/nfs4_db_impl.h>
  39 #include <sys/sdt.h>
  40 
  41 static int rfs4_reap_interval = RFS4_REAP_INTERVAL;
  42 
  43 static void rfs4_dbe_reap(rfs4_table_t *, time_t, uint32_t);
  44 static void rfs4_dbe_destroy(rfs4_dbe_t *);
  45 static rfs4_dbe_t *rfs4_dbe_create(rfs4_table_t *, id_t, rfs4_entry_t);
  46 static void rfs4_start_reaper(rfs4_table_t *);
  47 
  48 /*
  49  * t_lowat - integer percentage of table entries        /etc/system only
  50  * t_hiwat - integer percentage of table entries        /etc/system only
  51  * t_lreap - integer percentage of table reap time      mdb or /etc/system
  52  * t_hreap - integer percentage of table reap time      mdb or /etc/system
  53  */
  54 uint32_t        t_lowat = 50;   /* reap at t_lreap when id's in use hit 50% */
  55 uint32_t        t_hiwat = 75;   /* reap at t_hreap when id's in use hit 75% */
  56 time_t          t_lreap = 50;   /* default to 50% of table's reap interval */
  57 time_t          t_hreap = 10;   /* default to 10% of table's reap interval */
  58 
  59 id_t
  60 rfs4_dbe_getid(rfs4_dbe_t *entry)
  61 {
  62         return (entry->dbe_id);
  63 }
  64 
  65 void
  66 rfs4_dbe_hold(rfs4_dbe_t *entry)
  67 {
  68         atomic_inc_32(&entry->dbe_refcnt);
  69 }
  70 
  71 /*
  72  * rfs4_dbe_rele_nolock only decrements the reference count of the entry.
  73  */
  74 void
  75 rfs4_dbe_rele_nolock(rfs4_dbe_t *entry)
  76 {
  77         atomic_dec_32(&entry->dbe_refcnt);
  78 }
  79 
  80 
  81 uint32_t
  82 rfs4_dbe_refcnt(rfs4_dbe_t *entry)
  83 {
  84         return (entry->dbe_refcnt);
  85 }
  86 
  87 /*
  88  * Mark an entry such that the dbsearch will skip it.
  89  * Caller does not want this entry to be found any longer
  90  */
  91 void
  92 rfs4_dbe_invalidate(rfs4_dbe_t *entry)
  93 {
  94         entry->dbe_invalid = TRUE;
  95         entry->dbe_skipsearch = TRUE;
  96 }
  97 
  98 /*
  99  * Is this entry invalid?
 100  */
 101 bool_t
 102 rfs4_dbe_is_invalid(rfs4_dbe_t *entry)
 103 {
 104         return (entry->dbe_invalid);
 105 }
 106 
 107 time_t
 108 rfs4_dbe_get_timerele(rfs4_dbe_t *entry)
 109 {
 110         return (entry->dbe_time_rele);
 111 }
 112 
 113 /*
 114  * Use these to temporarily hide/unhide a db entry.
 115  */
 
 117 rfs4_dbe_hide(rfs4_dbe_t *entry)
 118 {
 119         rfs4_dbe_lock(entry);
 120         entry->dbe_skipsearch = TRUE;
 121         rfs4_dbe_unlock(entry);
 122 }
 123 
 124 void
 125 rfs4_dbe_unhide(rfs4_dbe_t *entry)
 126 {
 127         rfs4_dbe_lock(entry);
 128         entry->dbe_skipsearch = FALSE;
 129         rfs4_dbe_unlock(entry);
 130 }
 131 
 132 void
 133 rfs4_dbe_rele(rfs4_dbe_t *entry)
 134 {
 135         mutex_enter(entry->dbe_lock);
 136         ASSERT(entry->dbe_refcnt > 1);
 137         atomic_dec_32(&entry->dbe_refcnt);
 138         entry->dbe_time_rele = gethrestime_sec();
 139         mutex_exit(entry->dbe_lock);
 140 }
 141 
 142 void
 143 rfs4_dbe_lock(rfs4_dbe_t *entry)
 144 {
 145         mutex_enter(entry->dbe_lock);
 146 }
 147 
 148 void
 149 rfs4_dbe_unlock(rfs4_dbe_t *entry)
 150 {
 151         mutex_exit(entry->dbe_lock);
 152 }
 153 
 154 bool_t
 155 rfs4_dbe_islocked(rfs4_dbe_t *entry)
 156 {
 157         return (mutex_owned(entry->dbe_lock));
 
 805 }
 806 
 807 
 808 static void
 809 rfs4_dbe_reap(rfs4_table_t *table, time_t cache_time, uint32_t desired)
 810 {
 811         rfs4_index_t *idx = table->dbt_indices;
 812         rfs4_bucket_t *buckets = idx->dbi_buckets, *bp;
 813         rfs4_link_t *l, *t;
 814         rfs4_dbe_t *entry;
 815         bool_t found;
 816         int i;
 817         int count = 0;
 818 
 819         NFS4_DEBUG(table->dbt_debug & REAP_DEBUG,
 820             (CE_NOTE, "Reaping %d entries older than %ld seconds in table %s",
 821             desired, cache_time, table->dbt_name));
 822 
 823         /* Walk the buckets looking for entries to release/destroy */
 824         for (i = 0; i < table->dbt_len; i++) {
 825                 bp = &buckets[i];
 826                 do {
 827                         found = FALSE;
 828                         rw_enter(bp->dbk_lock, RW_READER);
 829                         for (l = bp->dbk_head; l; l = l->next) {
 830                                 entry = l->entry;
 831                                 /*
 832                                  * Examine an entry.  Ref count of 1 means
 833                                  * that the only reference is for the hash
 834                                  * table reference.
 835                                  */
 836                                 if (entry->dbe_refcnt != 1)
 837                                         continue;
 838                                 mutex_enter(entry->dbe_lock);
 839                                 if ((entry->dbe_refcnt == 1) &&
 840                                     (table->dbt_reaper_shutdown ||
 841                                     table->dbt_expiry == NULL ||
 842                                     (*table->dbt_expiry)(entry->dbe_data))) {
 843                                         entry->dbe_refcnt--;
 844                                         count++;
 845                                         found = TRUE;
 846                                 }
 847                                 mutex_exit(entry->dbe_lock);
 848                         }
 849                         if (found) {
 850                                 if (!rw_tryupgrade(bp->dbk_lock)) {
 851                                         rw_exit(bp->dbk_lock);
 852                                         rw_enter(bp->dbk_lock, RW_WRITER);
 853                                 }
 854 
 855                                 l = bp->dbk_head;
 856                                 while (l) {
 857                                         t = l;
 858                                         entry = t->entry;
 859                                         l = l->next;
 860                                         if (entry->dbe_refcnt == 0) {
 861                                                 DEQUEUE(bp->dbk_head, t);
 862                                                 t->next = NULL;
 863                                                 t->prev = NULL;
 864                                                 INVALIDATE_ADDR(t->entry);
 865                                                 rfs4_dbe_destroy(entry);
 866                                         }
 867                                 }
 868                         }
 869                         rw_exit(bp->dbk_lock);
 870                         /*
 871                          * delay slightly if there is more work to do
 872                          * with the expectation that other reaper
 873                          * threads are freeing data structures as well
 874                          * and in turn will reduce ref counts on
 875                          * entries in this table allowing them to be
 876                          * released.  This is only done in the
 877                          * instance that the tables are being shut down.
 878                          */
 879                         if (table->dbt_reaper_shutdown && bp->dbk_head != NULL)
 880                                 delay(hz/100);
 881                 /*
 882                  * If this is a table shutdown, keep going until
 883                  * everything is gone
 884                  */
 885                 } while (table->dbt_reaper_shutdown && bp->dbk_head != NULL);
 886 
 887                 if (!table->dbt_reaper_shutdown && desired && count >= desired)
 888                         break;
 889         }
 890 
 891         NFS4_DEBUG(table->dbt_debug & REAP_DEBUG,
 892             (CE_NOTE, "Reaped %d entries older than %ld seconds in table %s",
 893             count, cache_time, table->dbt_name));
 894 }
 895 
 896 static void
 897 reaper_thread(caddr_t *arg)
 898 {
 899         rfs4_table_t    *table = (rfs4_table_t *)arg;
 900         clock_t          rc;
 901 
 902         NFS4_DEBUG(table->dbt_debug,
 903             (CE_NOTE, "rfs4_reaper_thread starting for %s", table->dbt_name));
 904 
 905         CALLB_CPR_INIT(&table->dbt_reaper_cpr_info, &table->dbt_reaper_cv_lock,
 
 |