Print this page
NEX-20260 NFS hung in transitional state when RSF marks it maintenance
NEX-20423 NFSv4 state database entry locking is not always used around reference count.
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
NEX-16452 NFS server in a zone state database needs to be per zone
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-15279 support NFS server in zone
NEX-15520 online NFS shares cause zoneadm halt to hang in nfs_export_zone_fini
Portions contributed by: Dan Kruchinin dan.kruchinin@nexenta.com
Portions contributed by: Stepan Zastupov stepan.zastupov@gmail.com
Reviewed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>

*** 16,35 **** * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. */ #include <sys/systm.h> #include <sys/cmn_err.h> #include <sys/kmem.h> #include <sys/disp.h> #include <sys/id_space.h> - #include <sys/atomic.h> #include <rpc/rpc.h> #include <nfs/nfs4.h> #include <nfs/nfs4_db_impl.h> #include <sys/sdt.h> --- 16,39 ---- * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ + /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. */ + /* + * Copyright 2019 Nexenta Systems, Inc. + */ + #include <sys/systm.h> #include <sys/cmn_err.h> #include <sys/kmem.h> #include <sys/disp.h> #include <sys/id_space.h> #include <rpc/rpc.h> #include <nfs/nfs4.h> #include <nfs/nfs4_db_impl.h> #include <sys/sdt.h>
*** 58,77 **** } void rfs4_dbe_hold(rfs4_dbe_t *entry) { ! atomic_inc_32(&entry->dbe_refcnt); } /* * rfs4_dbe_rele_nolock only decrements the reference count of the entry. */ void rfs4_dbe_rele_nolock(rfs4_dbe_t *entry) { ! atomic_dec_32(&entry->dbe_refcnt); } uint32_t rfs4_dbe_refcnt(rfs4_dbe_t *entry) --- 62,94 ---- } void rfs4_dbe_hold(rfs4_dbe_t *entry) { ! if (!MUTEX_HELD(entry->dbe_lock)) { ! mutex_enter(entry->dbe_lock); ! entry->dbe_refcnt++; ! mutex_exit(entry->dbe_lock); ! } else { ! entry->dbe_refcnt++; ! } } /* * rfs4_dbe_rele_nolock only decrements the reference count of the entry. */ void rfs4_dbe_rele_nolock(rfs4_dbe_t *entry) { ! if (!MUTEX_HELD(entry->dbe_lock)) { ! ASSERT(entry->dbe_refcnt > 0); ! mutex_enter(entry->dbe_lock); ! entry->dbe_refcnt--; ! mutex_exit(entry->dbe_lock); ! } else { ! entry->dbe_refcnt--; ! } } uint32_t rfs4_dbe_refcnt(rfs4_dbe_t *entry)
*** 84,95 **** --- 101,119 ---- * Caller does not want this entry to be found any longer */ void rfs4_dbe_invalidate(rfs4_dbe_t *entry) { + if (!MUTEX_HELD(entry->dbe_lock)) { + mutex_enter(entry->dbe_lock); entry->dbe_invalid = TRUE; entry->dbe_skipsearch = TRUE; + mutex_exit(entry->dbe_lock); + } else { + entry->dbe_invalid = TRUE; + entry->dbe_skipsearch = TRUE; + } } /* * Is this entry invalid? */
*** 127,137 **** void rfs4_dbe_rele(rfs4_dbe_t *entry) { mutex_enter(entry->dbe_lock); ASSERT(entry->dbe_refcnt > 1); ! atomic_dec_32(&entry->dbe_refcnt); entry->dbe_time_rele = gethrestime_sec(); mutex_exit(entry->dbe_lock); } void --- 151,161 ---- void rfs4_dbe_rele(rfs4_dbe_t *entry) { mutex_enter(entry->dbe_lock); ASSERT(entry->dbe_refcnt > 1); ! entry->dbe_refcnt--; entry->dbe_time_rele = gethrestime_sec(); mutex_exit(entry->dbe_lock); } void
*** 247,256 **** --- 271,324 ---- mutex_destroy(db->db_lock); kmem_free(db, sizeof (rfs4_database_t)); } + /* + * Used to get the correct kmem_cache database for the state table being + * created. + * Helper function for rfs4_table_create + */ + static kmem_cache_t * + get_db_mem_cache(char *name) + { + int i; + + for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) { + if (strcmp(name, rfs4_db_mem_cache_table[i].r_db_name) == 0) + return (rfs4_db_mem_cache_table[i].r_db_mem_cache); + } + /* + * There is no associated kmem cache for this NFS4 server state + * table name + */ + return (NULL); + } + + /* + * Used to initialize the global NFSv4 server state database. + * Helper funtion for rfs4_state_g_init and called when module is loaded. + */ + kmem_cache_t * + /* CSTYLED */ + nfs4_init_mem_cache(char *cache_name, uint32_t idxcnt, uint32_t size, uint32_t idx) + { + kmem_cache_t *mem_cache = kmem_cache_create(cache_name, + sizeof (rfs4_dbe_t) + idxcnt * sizeof (rfs4_link_t) + size, + 0, + rfs4_dbe_kmem_constructor, + rfs4_dbe_kmem_destructor, + NULL, + NULL, + NULL, + 0); + (void) strlcpy(rfs4_db_mem_cache_table[idx].r_db_name, cache_name, + strlen(cache_name) + 1); + rfs4_db_mem_cache_table[idx].r_db_mem_cache = mem_cache; + return (mem_cache); + } + rfs4_table_t * rfs4_table_create(rfs4_database_t *db, char *tabname, time_t max_cache_time, uint32_t idxcnt, bool_t (*create)(rfs4_entry_t, void *), void (*destroy)(rfs4_entry_t), bool_t (*expiry)(rfs4_entry_t),
*** 302,320 **** table->dbt_maxentries = maxentries; table->dbt_create = create; table->dbt_destroy = destroy; table->dbt_expiry = expiry; ! table->dbt_mem_cache = kmem_cache_create(cache_name, ! sizeof (rfs4_dbe_t) + idxcnt * sizeof (rfs4_link_t) + size, ! 0, ! rfs4_dbe_kmem_constructor, ! rfs4_dbe_kmem_destructor, ! NULL, ! table, ! NULL, ! 0); kmem_free(cache_name, len+13); table->dbt_debug = db->db_debug_flags; mutex_enter(db->db_lock); --- 370,384 ---- table->dbt_maxentries = maxentries; table->dbt_create = create; table->dbt_destroy = destroy; table->dbt_expiry = expiry; ! /* ! * get the correct kmem_cache for this table type based on the name. ! */ ! table->dbt_mem_cache = get_db_mem_cache(cache_name); ! kmem_free(cache_name, len+13); table->dbt_debug = db->db_debug_flags; mutex_enter(db->db_lock);
*** 362,372 **** cv_destroy(&table->dbt_reaper_wait); kmem_free(table->dbt_name, strlen(table->dbt_name) + 1); if (table->dbt_id_space) id_space_destroy(table->dbt_id_space); ! kmem_cache_destroy(table->dbt_mem_cache); kmem_free(table, sizeof (rfs4_table_t)); } rfs4_index_t * rfs4_index_create(rfs4_table_t *table, char *keyname, --- 426,436 ---- cv_destroy(&table->dbt_reaper_wait); kmem_free(table->dbt_name, strlen(table->dbt_name) + 1); if (table->dbt_id_space) id_space_destroy(table->dbt_id_space); ! table->dbt_mem_cache = NULL; kmem_free(table, sizeof (rfs4_table_t)); } rfs4_index_t * rfs4_index_create(rfs4_table_t *table, char *keyname,
*** 681,696 **** /*ARGSUSED*/ boolean_t rfs4_cpr_callb(void *arg, int code) { - rfs4_table_t *table = rfs4_client_tab; rfs4_bucket_t *buckets, *bp; rfs4_link_t *l; rfs4_client_t *cp; int i; /* * We get called for Suspend and Resume events. * For the suspend case we simply don't care! Nor do we care if * there are no clients. */ --- 745,762 ---- /*ARGSUSED*/ boolean_t rfs4_cpr_callb(void *arg, int code) { rfs4_bucket_t *buckets, *bp; rfs4_link_t *l; rfs4_client_t *cp; int i; + nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone); + rfs4_table_t *table = nsrv4->rfs4_client_tab; + /* * We get called for Suspend and Resume events. * For the suspend case we simply don't care! Nor do we care if * there are no clients. */
*** 773,801 **** (CE_NOTE, "Reaping %d entries older than %ld seconds in table %s", desired, cache_time, table->dbt_name)); /* Walk the buckets looking for entries to release/destroy */ for (i = 0; i < table->dbt_len; i++) { bp = &buckets[i]; do { found = FALSE; rw_enter(bp->dbk_lock, RW_READER); for (l = bp->dbk_head; l; l = l->next) { entry = l->entry; /* * Examine an entry. Ref count of 1 means * that the only reference is for the hash * table reference. */ ! if (entry->dbe_refcnt != 1) continue; ! mutex_enter(entry->dbe_lock); if ((entry->dbe_refcnt == 1) && (table->dbt_reaper_shutdown || table->dbt_expiry == NULL || (*table->dbt_expiry)(entry->dbe_data))) { ! entry->dbe_refcnt--; count++; found = TRUE; } mutex_exit(entry->dbe_lock); } --- 839,874 ---- (CE_NOTE, "Reaping %d entries older than %ld seconds in table %s", desired, cache_time, table->dbt_name)); /* Walk the buckets looking for entries to release/destroy */ for (i = 0; i < table->dbt_len; i++) { + int retries = 0; bp = &buckets[i]; do { found = FALSE; rw_enter(bp->dbk_lock, RW_READER); for (l = bp->dbk_head; l; l = l->next) { entry = l->entry; + mutex_enter(entry->dbe_lock); + ASSERT(entry->dbe_refcnt != 0); /* * Examine an entry. Ref count of 1 means * that the only reference is for the hash * table reference. */ ! if (entry->dbe_refcnt != 1) { ! #ifdef DEBUG ! rfs4_dbe_debug(entry); ! #endif ! mutex_exit(entry->dbe_lock); continue; ! } if ((entry->dbe_refcnt == 1) && (table->dbt_reaper_shutdown || table->dbt_expiry == NULL || (*table->dbt_expiry)(entry->dbe_data))) { ! rfs4_dbe_rele_nolock(entry); count++; found = TRUE; } mutex_exit(entry->dbe_lock); }
*** 808,826 **** l = bp->dbk_head; while (l) { t = l; entry = t->entry; l = l->next; if (entry->dbe_refcnt == 0) { DEQUEUE(bp->dbk_head, t); t->next = NULL; t->prev = NULL; INVALIDATE_ADDR(t->entry); rfs4_dbe_destroy(entry); } } - } rw_exit(bp->dbk_lock); /* * delay slightly if there is more work to do * with the expectation that other reaper * threads are freeing data structures as well --- 881,902 ---- l = bp->dbk_head; while (l) { t = l; entry = t->entry; l = l->next; + mutex_enter(entry->dbe_lock); if (entry->dbe_refcnt == 0) { DEQUEUE(bp->dbk_head, t); + mutex_exit(entry->dbe_lock); t->next = NULL; t->prev = NULL; INVALIDATE_ADDR(t->entry); rfs4_dbe_destroy(entry); + } else + mutex_exit(entry->dbe_lock); } } rw_exit(bp->dbk_lock); /* * delay slightly if there is more work to do * with the expectation that other reaper * threads are freeing data structures as well
*** 827,843 **** * and in turn will reduce ref counts on * entries in this table allowing them to be * released. This is only done in the * instance that the tables are being shut down. */ ! if (table->dbt_reaper_shutdown && bp->dbk_head != NULL) delay(hz/100); /* * If this is a table shutdown, keep going until * everything is gone */ ! } while (table->dbt_reaper_shutdown && bp->dbk_head != NULL); if (!table->dbt_reaper_shutdown && desired && count >= desired) break; } --- 903,921 ---- * and in turn will reduce ref counts on * entries in this table allowing them to be * released. This is only done in the * instance that the tables are being shut down. */ ! if (table->dbt_reaper_shutdown && bp->dbk_head != NULL) { delay(hz/100); + retries++; + } /* * If this is a table shutdown, keep going until * everything is gone */ ! } while (table->dbt_reaper_shutdown && bp->dbk_head != NULL && retries < 5); if (!table->dbt_reaper_shutdown && desired && count >= desired) break; }
*** 877,895 **** /* Notify the database shutdown processing that the table is shutdown */ mutex_enter(table->dbt_db->db_lock); table->dbt_db->db_shutdown_count--; cv_signal(&table->dbt_db->db_shutdown_wait); mutex_exit(table->dbt_db->db_lock); } static void rfs4_start_reaper(rfs4_table_t *table) { if (table->dbt_max_cache_time == 0) return; ! (void) thread_create(NULL, 0, reaper_thread, table, 0, &p0, TS_RUN, minclsyspri); } #ifdef DEBUG void --- 955,974 ---- /* Notify the database shutdown processing that the table is shutdown */ mutex_enter(table->dbt_db->db_lock); table->dbt_db->db_shutdown_count--; cv_signal(&table->dbt_db->db_shutdown_wait); mutex_exit(table->dbt_db->db_lock); + zthread_exit(); } static void rfs4_start_reaper(rfs4_table_t *table) { if (table->dbt_max_cache_time == 0) return; ! (void) zthread_create(NULL, 0, reaper_thread, table, 0, minclsyspri); } #ifdef DEBUG void