Print this page
NEX-20260 NFS hung in transitional state when RSF marks it maintenance
NEX-20423 NFSv4 state database entry locking is not always used around reference count.
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
NEX-16452 NFS server in a zone state database needs to be per zone
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-15279 support NFS server in zone
NEX-15520 online NFS shares cause zoneadm halt to hang in nfs_export_zone_fini
Portions contributed by: Dan Kruchinin dan.kruchinin@nexenta.com
Portions contributed by: Stepan Zastupov stepan.zastupov@gmail.com
Reviewed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
@@ -16,20 +16,24 @@
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
+
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
*/
+/*
+ * Copyright 2019 Nexenta Systems, Inc.
+ */
+
#include <sys/systm.h>
#include <sys/cmn_err.h>
#include <sys/kmem.h>
#include <sys/disp.h>
#include <sys/id_space.h>
-#include <sys/atomic.h>
#include <rpc/rpc.h>
#include <nfs/nfs4.h>
#include <nfs/nfs4_db_impl.h>
#include <sys/sdt.h>
@@ -58,20 +62,33 @@
}
void
rfs4_dbe_hold(rfs4_dbe_t *entry)
{
- atomic_inc_32(&entry->dbe_refcnt);
+ if (!MUTEX_HELD(entry->dbe_lock)) {
+ mutex_enter(entry->dbe_lock);
+ entry->dbe_refcnt++;
+ mutex_exit(entry->dbe_lock);
+ } else {
+ entry->dbe_refcnt++;
+ }
}
/*
* rfs4_dbe_rele_nolock only decrements the reference count of the entry.
*/
void
rfs4_dbe_rele_nolock(rfs4_dbe_t *entry)
{
- atomic_dec_32(&entry->dbe_refcnt);
+ if (!MUTEX_HELD(entry->dbe_lock)) {
+ ASSERT(entry->dbe_refcnt > 0);
+ mutex_enter(entry->dbe_lock);
+ entry->dbe_refcnt--;
+ mutex_exit(entry->dbe_lock);
+ } else {
+ entry->dbe_refcnt--;
+ }
}
uint32_t
rfs4_dbe_refcnt(rfs4_dbe_t *entry)
@@ -84,12 +101,19 @@
* Caller does not want this entry to be found any longer
*/
void
rfs4_dbe_invalidate(rfs4_dbe_t *entry)
{
+ if (!MUTEX_HELD(entry->dbe_lock)) {
+ mutex_enter(entry->dbe_lock);
entry->dbe_invalid = TRUE;
entry->dbe_skipsearch = TRUE;
+ mutex_exit(entry->dbe_lock);
+ } else {
+ entry->dbe_invalid = TRUE;
+ entry->dbe_skipsearch = TRUE;
+ }
}
/*
* Is this entry invalid?
*/
@@ -127,11 +151,11 @@
void
rfs4_dbe_rele(rfs4_dbe_t *entry)
{
mutex_enter(entry->dbe_lock);
ASSERT(entry->dbe_refcnt > 1);
- atomic_dec_32(&entry->dbe_refcnt);
+ entry->dbe_refcnt--;
entry->dbe_time_rele = gethrestime_sec();
mutex_exit(entry->dbe_lock);
}
void
@@ -247,10 +271,54 @@
mutex_destroy(db->db_lock);
kmem_free(db, sizeof (rfs4_database_t));
}
+/*
+ * Used to get the correct kmem_cache database for the state table being
+ * created.
+ * Helper function for rfs4_table_create
+ */
+static kmem_cache_t *
+get_db_mem_cache(char *name)
+{
+ int i;
+
+ for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) {
+ if (strcmp(name, rfs4_db_mem_cache_table[i].r_db_name) == 0)
+ return (rfs4_db_mem_cache_table[i].r_db_mem_cache);
+ }
+ /*
+ * There is no associated kmem cache for this NFS4 server state
+ * table name
+ */
+ return (NULL);
+}
+
+/*
+ * Used to initialize the global NFSv4 server state database.
+ * Helper funtion for rfs4_state_g_init and called when module is loaded.
+ */
+kmem_cache_t *
+/* CSTYLED */
+nfs4_init_mem_cache(char *cache_name, uint32_t idxcnt, uint32_t size, uint32_t idx)
+{
+ kmem_cache_t *mem_cache = kmem_cache_create(cache_name,
+ sizeof (rfs4_dbe_t) + idxcnt * sizeof (rfs4_link_t) + size,
+ 0,
+ rfs4_dbe_kmem_constructor,
+ rfs4_dbe_kmem_destructor,
+ NULL,
+ NULL,
+ NULL,
+ 0);
+ (void) strlcpy(rfs4_db_mem_cache_table[idx].r_db_name, cache_name,
+ strlen(cache_name) + 1);
+ rfs4_db_mem_cache_table[idx].r_db_mem_cache = mem_cache;
+ return (mem_cache);
+}
+
rfs4_table_t *
rfs4_table_create(rfs4_database_t *db, char *tabname, time_t max_cache_time,
uint32_t idxcnt, bool_t (*create)(rfs4_entry_t, void *),
void (*destroy)(rfs4_entry_t),
bool_t (*expiry)(rfs4_entry_t),
@@ -302,19 +370,15 @@
table->dbt_maxentries = maxentries;
table->dbt_create = create;
table->dbt_destroy = destroy;
table->dbt_expiry = expiry;
- table->dbt_mem_cache = kmem_cache_create(cache_name,
- sizeof (rfs4_dbe_t) + idxcnt * sizeof (rfs4_link_t) + size,
- 0,
- rfs4_dbe_kmem_constructor,
- rfs4_dbe_kmem_destructor,
- NULL,
- table,
- NULL,
- 0);
+ /*
+ * get the correct kmem_cache for this table type based on the name.
+ */
+ table->dbt_mem_cache = get_db_mem_cache(cache_name);
+
kmem_free(cache_name, len+13);
table->dbt_debug = db->db_debug_flags;
mutex_enter(db->db_lock);
@@ -362,11 +426,11 @@
cv_destroy(&table->dbt_reaper_wait);
kmem_free(table->dbt_name, strlen(table->dbt_name) + 1);
if (table->dbt_id_space)
id_space_destroy(table->dbt_id_space);
- kmem_cache_destroy(table->dbt_mem_cache);
+ table->dbt_mem_cache = NULL;
kmem_free(table, sizeof (rfs4_table_t));
}
rfs4_index_t *
rfs4_index_create(rfs4_table_t *table, char *keyname,
@@ -681,16 +745,18 @@
/*ARGSUSED*/
boolean_t
rfs4_cpr_callb(void *arg, int code)
{
- rfs4_table_t *table = rfs4_client_tab;
rfs4_bucket_t *buckets, *bp;
rfs4_link_t *l;
rfs4_client_t *cp;
int i;
+ nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
+ rfs4_table_t *table = nsrv4->rfs4_client_tab;
+
/*
* We get called for Suspend and Resume events.
* For the suspend case we simply don't care! Nor do we care if
* there are no clients.
*/
@@ -773,29 +839,36 @@
(CE_NOTE, "Reaping %d entries older than %ld seconds in table %s",
desired, cache_time, table->dbt_name));
/* Walk the buckets looking for entries to release/destroy */
for (i = 0; i < table->dbt_len; i++) {
+ int retries = 0;
bp = &buckets[i];
do {
found = FALSE;
rw_enter(bp->dbk_lock, RW_READER);
for (l = bp->dbk_head; l; l = l->next) {
entry = l->entry;
+ mutex_enter(entry->dbe_lock);
+ ASSERT(entry->dbe_refcnt != 0);
/*
* Examine an entry. Ref count of 1 means
* that the only reference is for the hash
* table reference.
*/
- if (entry->dbe_refcnt != 1)
+ if (entry->dbe_refcnt != 1) {
+#ifdef DEBUG
+ rfs4_dbe_debug(entry);
+#endif
+ mutex_exit(entry->dbe_lock);
continue;
- mutex_enter(entry->dbe_lock);
+ }
if ((entry->dbe_refcnt == 1) &&
(table->dbt_reaper_shutdown ||
table->dbt_expiry == NULL ||
(*table->dbt_expiry)(entry->dbe_data))) {
- entry->dbe_refcnt--;
+ rfs4_dbe_rele_nolock(entry);
count++;
found = TRUE;
}
mutex_exit(entry->dbe_lock);
}
@@ -808,19 +881,22 @@
l = bp->dbk_head;
while (l) {
t = l;
entry = t->entry;
l = l->next;
+ mutex_enter(entry->dbe_lock);
if (entry->dbe_refcnt == 0) {
DEQUEUE(bp->dbk_head, t);
+ mutex_exit(entry->dbe_lock);
t->next = NULL;
t->prev = NULL;
INVALIDATE_ADDR(t->entry);
rfs4_dbe_destroy(entry);
+ } else
+ mutex_exit(entry->dbe_lock);
}
}
- }
rw_exit(bp->dbk_lock);
/*
* delay slightly if there is more work to do
* with the expectation that other reaper
* threads are freeing data structures as well
@@ -827,17 +903,19 @@
* and in turn will reduce ref counts on
* entries in this table allowing them to be
* released. This is only done in the
* instance that the tables are being shut down.
*/
- if (table->dbt_reaper_shutdown && bp->dbk_head != NULL)
+ if (table->dbt_reaper_shutdown && bp->dbk_head != NULL) {
delay(hz/100);
+ retries++;
+ }
/*
* If this is a table shutdown, keep going until
* everything is gone
*/
- } while (table->dbt_reaper_shutdown && bp->dbk_head != NULL);
+ } while (table->dbt_reaper_shutdown && bp->dbk_head != NULL && retries < 5);
if (!table->dbt_reaper_shutdown && desired && count >= desired)
break;
}
@@ -877,19 +955,20 @@
/* Notify the database shutdown processing that the table is shutdown */
mutex_enter(table->dbt_db->db_lock);
table->dbt_db->db_shutdown_count--;
cv_signal(&table->dbt_db->db_shutdown_wait);
mutex_exit(table->dbt_db->db_lock);
+ zthread_exit();
}
static void
rfs4_start_reaper(rfs4_table_t *table)
{
if (table->dbt_max_cache_time == 0)
return;
- (void) thread_create(NULL, 0, reaper_thread, table, 0, &p0, TS_RUN,
+ (void) zthread_create(NULL, 0, reaper_thread, table, 0,
minclsyspri);
}
#ifdef DEBUG
void