Print this page
NEX-20260 NFS hung in transitional state when RSF marks it maintenance
NEX-20423 NFSv4 state database entry locking is not always used around reference count.
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
NEX-16452 NFS server in a zone state database needs to be per zone
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-15279 support NFS server in zone
NEX-15520 online NFS shares cause zoneadm halt to hang in nfs_export_zone_fini
Portions contributed by: Dan Kruchinin dan.kruchinin@nexenta.com
Portions contributed by: Stepan Zastupov stepan.zastupov@gmail.com
Reviewed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
*** 16,35 ****
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <sys/systm.h>
#include <sys/cmn_err.h>
#include <sys/kmem.h>
#include <sys/disp.h>
#include <sys/id_space.h>
- #include <sys/atomic.h>
#include <rpc/rpc.h>
#include <nfs/nfs4.h>
#include <nfs/nfs4_db_impl.h>
#include <sys/sdt.h>
--- 16,39 ----
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
+
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
*/
+ /*
+ * Copyright 2019 Nexenta Systems, Inc.
+ */
+
#include <sys/systm.h>
#include <sys/cmn_err.h>
#include <sys/kmem.h>
#include <sys/disp.h>
#include <sys/id_space.h>
#include <rpc/rpc.h>
#include <nfs/nfs4.h>
#include <nfs/nfs4_db_impl.h>
#include <sys/sdt.h>
*** 58,77 ****
}
void
rfs4_dbe_hold(rfs4_dbe_t *entry)
{
! atomic_inc_32(&entry->dbe_refcnt);
}
/*
* rfs4_dbe_rele_nolock only decrements the reference count of the entry.
*/
void
rfs4_dbe_rele_nolock(rfs4_dbe_t *entry)
{
! atomic_dec_32(&entry->dbe_refcnt);
}
uint32_t
rfs4_dbe_refcnt(rfs4_dbe_t *entry)
--- 62,94 ----
}
void
rfs4_dbe_hold(rfs4_dbe_t *entry)
{
! if (!MUTEX_HELD(entry->dbe_lock)) {
! mutex_enter(entry->dbe_lock);
! entry->dbe_refcnt++;
! mutex_exit(entry->dbe_lock);
! } else {
! entry->dbe_refcnt++;
! }
}
/*
* rfs4_dbe_rele_nolock only decrements the reference count of the entry.
*/
void
rfs4_dbe_rele_nolock(rfs4_dbe_t *entry)
{
! if (!MUTEX_HELD(entry->dbe_lock)) {
! ASSERT(entry->dbe_refcnt > 0);
! mutex_enter(entry->dbe_lock);
! entry->dbe_refcnt--;
! mutex_exit(entry->dbe_lock);
! } else {
! entry->dbe_refcnt--;
! }
}
uint32_t
rfs4_dbe_refcnt(rfs4_dbe_t *entry)
*** 84,95 ****
--- 101,119 ----
* Caller does not want this entry to be found any longer
*/
void
rfs4_dbe_invalidate(rfs4_dbe_t *entry)
{
+ if (!MUTEX_HELD(entry->dbe_lock)) {
+ mutex_enter(entry->dbe_lock);
entry->dbe_invalid = TRUE;
entry->dbe_skipsearch = TRUE;
+ mutex_exit(entry->dbe_lock);
+ } else {
+ entry->dbe_invalid = TRUE;
+ entry->dbe_skipsearch = TRUE;
+ }
}
/*
* Is this entry invalid?
*/
*** 127,137 ****
void
rfs4_dbe_rele(rfs4_dbe_t *entry)
{
mutex_enter(entry->dbe_lock);
ASSERT(entry->dbe_refcnt > 1);
! atomic_dec_32(&entry->dbe_refcnt);
entry->dbe_time_rele = gethrestime_sec();
mutex_exit(entry->dbe_lock);
}
void
--- 151,161 ----
void
rfs4_dbe_rele(rfs4_dbe_t *entry)
{
mutex_enter(entry->dbe_lock);
ASSERT(entry->dbe_refcnt > 1);
! entry->dbe_refcnt--;
entry->dbe_time_rele = gethrestime_sec();
mutex_exit(entry->dbe_lock);
}
void
*** 247,256 ****
--- 271,324 ----
mutex_destroy(db->db_lock);
kmem_free(db, sizeof (rfs4_database_t));
}
+ /*
+ * Used to get the correct kmem_cache database for the state table being
+ * created.
+ * Helper function for rfs4_table_create
+ */
+ static kmem_cache_t *
+ get_db_mem_cache(char *name)
+ {
+ int i;
+
+ for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) {
+ if (strcmp(name, rfs4_db_mem_cache_table[i].r_db_name) == 0)
+ return (rfs4_db_mem_cache_table[i].r_db_mem_cache);
+ }
+ /*
+ * There is no associated kmem cache for this NFS4 server state
+ * table name
+ */
+ return (NULL);
+ }
+
+ /*
+ * Used to initialize the global NFSv4 server state database.
+ * Helper funtion for rfs4_state_g_init and called when module is loaded.
+ */
+ kmem_cache_t *
+ /* CSTYLED */
+ nfs4_init_mem_cache(char *cache_name, uint32_t idxcnt, uint32_t size, uint32_t idx)
+ {
+ kmem_cache_t *mem_cache = kmem_cache_create(cache_name,
+ sizeof (rfs4_dbe_t) + idxcnt * sizeof (rfs4_link_t) + size,
+ 0,
+ rfs4_dbe_kmem_constructor,
+ rfs4_dbe_kmem_destructor,
+ NULL,
+ NULL,
+ NULL,
+ 0);
+ (void) strlcpy(rfs4_db_mem_cache_table[idx].r_db_name, cache_name,
+ strlen(cache_name) + 1);
+ rfs4_db_mem_cache_table[idx].r_db_mem_cache = mem_cache;
+ return (mem_cache);
+ }
+
rfs4_table_t *
rfs4_table_create(rfs4_database_t *db, char *tabname, time_t max_cache_time,
uint32_t idxcnt, bool_t (*create)(rfs4_entry_t, void *),
void (*destroy)(rfs4_entry_t),
bool_t (*expiry)(rfs4_entry_t),
*** 302,320 ****
table->dbt_maxentries = maxentries;
table->dbt_create = create;
table->dbt_destroy = destroy;
table->dbt_expiry = expiry;
! table->dbt_mem_cache = kmem_cache_create(cache_name,
! sizeof (rfs4_dbe_t) + idxcnt * sizeof (rfs4_link_t) + size,
! 0,
! rfs4_dbe_kmem_constructor,
! rfs4_dbe_kmem_destructor,
! NULL,
! table,
! NULL,
! 0);
kmem_free(cache_name, len+13);
table->dbt_debug = db->db_debug_flags;
mutex_enter(db->db_lock);
--- 370,384 ----
table->dbt_maxentries = maxentries;
table->dbt_create = create;
table->dbt_destroy = destroy;
table->dbt_expiry = expiry;
! /*
! * get the correct kmem_cache for this table type based on the name.
! */
! table->dbt_mem_cache = get_db_mem_cache(cache_name);
!
kmem_free(cache_name, len+13);
table->dbt_debug = db->db_debug_flags;
mutex_enter(db->db_lock);
*** 362,372 ****
cv_destroy(&table->dbt_reaper_wait);
kmem_free(table->dbt_name, strlen(table->dbt_name) + 1);
if (table->dbt_id_space)
id_space_destroy(table->dbt_id_space);
! kmem_cache_destroy(table->dbt_mem_cache);
kmem_free(table, sizeof (rfs4_table_t));
}
rfs4_index_t *
rfs4_index_create(rfs4_table_t *table, char *keyname,
--- 426,436 ----
cv_destroy(&table->dbt_reaper_wait);
kmem_free(table->dbt_name, strlen(table->dbt_name) + 1);
if (table->dbt_id_space)
id_space_destroy(table->dbt_id_space);
! table->dbt_mem_cache = NULL;
kmem_free(table, sizeof (rfs4_table_t));
}
rfs4_index_t *
rfs4_index_create(rfs4_table_t *table, char *keyname,
*** 681,696 ****
/*ARGSUSED*/
boolean_t
rfs4_cpr_callb(void *arg, int code)
{
- rfs4_table_t *table = rfs4_client_tab;
rfs4_bucket_t *buckets, *bp;
rfs4_link_t *l;
rfs4_client_t *cp;
int i;
/*
* We get called for Suspend and Resume events.
* For the suspend case we simply don't care! Nor do we care if
* there are no clients.
*/
--- 745,762 ----
/*ARGSUSED*/
boolean_t
rfs4_cpr_callb(void *arg, int code)
{
rfs4_bucket_t *buckets, *bp;
rfs4_link_t *l;
rfs4_client_t *cp;
int i;
+ nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
+ rfs4_table_t *table = nsrv4->rfs4_client_tab;
+
/*
* We get called for Suspend and Resume events.
* For the suspend case we simply don't care! Nor do we care if
* there are no clients.
*/
*** 773,801 ****
(CE_NOTE, "Reaping %d entries older than %ld seconds in table %s",
desired, cache_time, table->dbt_name));
/* Walk the buckets looking for entries to release/destroy */
for (i = 0; i < table->dbt_len; i++) {
bp = &buckets[i];
do {
found = FALSE;
rw_enter(bp->dbk_lock, RW_READER);
for (l = bp->dbk_head; l; l = l->next) {
entry = l->entry;
/*
* Examine an entry. Ref count of 1 means
* that the only reference is for the hash
* table reference.
*/
! if (entry->dbe_refcnt != 1)
continue;
! mutex_enter(entry->dbe_lock);
if ((entry->dbe_refcnt == 1) &&
(table->dbt_reaper_shutdown ||
table->dbt_expiry == NULL ||
(*table->dbt_expiry)(entry->dbe_data))) {
! entry->dbe_refcnt--;
count++;
found = TRUE;
}
mutex_exit(entry->dbe_lock);
}
--- 839,874 ----
(CE_NOTE, "Reaping %d entries older than %ld seconds in table %s",
desired, cache_time, table->dbt_name));
/* Walk the buckets looking for entries to release/destroy */
for (i = 0; i < table->dbt_len; i++) {
+ int retries = 0;
bp = &buckets[i];
do {
found = FALSE;
rw_enter(bp->dbk_lock, RW_READER);
for (l = bp->dbk_head; l; l = l->next) {
entry = l->entry;
+ mutex_enter(entry->dbe_lock);
+ ASSERT(entry->dbe_refcnt != 0);
/*
* Examine an entry. Ref count of 1 means
* that the only reference is for the hash
* table reference.
*/
! if (entry->dbe_refcnt != 1) {
! #ifdef DEBUG
! rfs4_dbe_debug(entry);
! #endif
! mutex_exit(entry->dbe_lock);
continue;
! }
if ((entry->dbe_refcnt == 1) &&
(table->dbt_reaper_shutdown ||
table->dbt_expiry == NULL ||
(*table->dbt_expiry)(entry->dbe_data))) {
! rfs4_dbe_rele_nolock(entry);
count++;
found = TRUE;
}
mutex_exit(entry->dbe_lock);
}
*** 808,826 ****
l = bp->dbk_head;
while (l) {
t = l;
entry = t->entry;
l = l->next;
if (entry->dbe_refcnt == 0) {
DEQUEUE(bp->dbk_head, t);
t->next = NULL;
t->prev = NULL;
INVALIDATE_ADDR(t->entry);
rfs4_dbe_destroy(entry);
}
}
- }
rw_exit(bp->dbk_lock);
/*
* delay slightly if there is more work to do
* with the expectation that other reaper
* threads are freeing data structures as well
--- 881,902 ----
l = bp->dbk_head;
while (l) {
t = l;
entry = t->entry;
l = l->next;
+ mutex_enter(entry->dbe_lock);
if (entry->dbe_refcnt == 0) {
DEQUEUE(bp->dbk_head, t);
+ mutex_exit(entry->dbe_lock);
t->next = NULL;
t->prev = NULL;
INVALIDATE_ADDR(t->entry);
rfs4_dbe_destroy(entry);
+ } else
+ mutex_exit(entry->dbe_lock);
}
}
rw_exit(bp->dbk_lock);
/*
* delay slightly if there is more work to do
* with the expectation that other reaper
* threads are freeing data structures as well
*** 827,843 ****
* and in turn will reduce ref counts on
* entries in this table allowing them to be
* released. This is only done in the
* instance that the tables are being shut down.
*/
! if (table->dbt_reaper_shutdown && bp->dbk_head != NULL)
delay(hz/100);
/*
* If this is a table shutdown, keep going until
* everything is gone
*/
! } while (table->dbt_reaper_shutdown && bp->dbk_head != NULL);
if (!table->dbt_reaper_shutdown && desired && count >= desired)
break;
}
--- 903,921 ----
* and in turn will reduce ref counts on
* entries in this table allowing them to be
* released. This is only done in the
* instance that the tables are being shut down.
*/
! if (table->dbt_reaper_shutdown && bp->dbk_head != NULL) {
delay(hz/100);
+ retries++;
+ }
/*
* If this is a table shutdown, keep going until
* everything is gone
*/
! } while (table->dbt_reaper_shutdown && bp->dbk_head != NULL && retries < 5);
if (!table->dbt_reaper_shutdown && desired && count >= desired)
break;
}
*** 877,895 ****
/* Notify the database shutdown processing that the table is shutdown */
mutex_enter(table->dbt_db->db_lock);
table->dbt_db->db_shutdown_count--;
cv_signal(&table->dbt_db->db_shutdown_wait);
mutex_exit(table->dbt_db->db_lock);
}
static void
rfs4_start_reaper(rfs4_table_t *table)
{
if (table->dbt_max_cache_time == 0)
return;
! (void) thread_create(NULL, 0, reaper_thread, table, 0, &p0, TS_RUN,
minclsyspri);
}
#ifdef DEBUG
void
--- 955,974 ----
/* Notify the database shutdown processing that the table is shutdown */
mutex_enter(table->dbt_db->db_lock);
table->dbt_db->db_shutdown_count--;
cv_signal(&table->dbt_db->db_shutdown_wait);
mutex_exit(table->dbt_db->db_lock);
+ zthread_exit();
}
static void
rfs4_start_reaper(rfs4_table_t *table)
{
if (table->dbt_max_cache_time == 0)
return;
! (void) zthread_create(NULL, 0, reaper_thread, table, 0,
minclsyspri);
}
#ifdef DEBUG
void