Print this page
NEX-20260 NFS hung in transitional state when RSF marks it maintenance
NEX-20423 NFSv4 state database entry locking is not always used around reference count.
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
NEX-16452 NFS server in a zone state database needs to be per zone
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-15279 support NFS server in zone
NEX-15520 online NFS shares cause zoneadm halt to hang in nfs_export_zone_fini
Portions contributed by: Dan Kruchinin dan.kruchinin@nexenta.com
Portions contributed by: Stepan Zastupov stepan.zastupov@gmail.com
Reviewed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
        
*** 16,35 ****
   * fields enclosed by brackets "[]" replaced with your own identifying
   * information: Portions Copyright [yyyy] [name of copyright owner]
   *
   * CDDL HEADER END
   */
  /*
   * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
   */
  
  #include <sys/systm.h>
  #include <sys/cmn_err.h>
  #include <sys/kmem.h>
  #include <sys/disp.h>
  #include <sys/id_space.h>
- #include <sys/atomic.h>
  #include <rpc/rpc.h>
  #include <nfs/nfs4.h>
  #include <nfs/nfs4_db_impl.h>
  #include <sys/sdt.h>
  
--- 16,39 ----
   * fields enclosed by brackets "[]" replaced with your own identifying
   * information: Portions Copyright [yyyy] [name of copyright owner]
   *
   * CDDL HEADER END
   */
+ 
  /*
   * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
   */
  
+ /*
+  * Copyright 2019 Nexenta Systems, Inc.
+  */
+ 
  #include <sys/systm.h>
  #include <sys/cmn_err.h>
  #include <sys/kmem.h>
  #include <sys/disp.h>
  #include <sys/id_space.h>
  #include <rpc/rpc.h>
  #include <nfs/nfs4.h>
  #include <nfs/nfs4_db_impl.h>
  #include <sys/sdt.h>
  
*** 58,77 ****
  }
  
  void
  rfs4_dbe_hold(rfs4_dbe_t *entry)
  {
!         atomic_inc_32(&entry->dbe_refcnt);
  }
  
  /*
   * rfs4_dbe_rele_nolock only decrements the reference count of the entry.
   */
  void
  rfs4_dbe_rele_nolock(rfs4_dbe_t *entry)
  {
!         atomic_dec_32(&entry->dbe_refcnt);
  }
  
  
  uint32_t
  rfs4_dbe_refcnt(rfs4_dbe_t *entry)
--- 62,94 ----
  }
  
  void
  rfs4_dbe_hold(rfs4_dbe_t *entry)
  {
!         if (!MUTEX_HELD(entry->dbe_lock)) {
!                 mutex_enter(entry->dbe_lock);
!                 entry->dbe_refcnt++;
!                 mutex_exit(entry->dbe_lock);
!         } else {
!                 entry->dbe_refcnt++;
!         }
  }
  
  /*
   * rfs4_dbe_rele_nolock only decrements the reference count of the entry.
   */
  void
  rfs4_dbe_rele_nolock(rfs4_dbe_t *entry)
  {
!         if (!MUTEX_HELD(entry->dbe_lock)) {
!                 ASSERT(entry->dbe_refcnt > 0);
!                 mutex_enter(entry->dbe_lock);
!                 entry->dbe_refcnt--;
!                 mutex_exit(entry->dbe_lock);
!         } else {
!                 entry->dbe_refcnt--;
!         }
  }
  
  
  uint32_t
  rfs4_dbe_refcnt(rfs4_dbe_t *entry)
*** 84,95 ****
--- 101,119 ----
   * Caller does not want this entry to be found any longer
   */
  void
  rfs4_dbe_invalidate(rfs4_dbe_t *entry)
  {
+         if (!MUTEX_HELD(entry->dbe_lock)) {
+                 mutex_enter(entry->dbe_lock);
                  entry->dbe_invalid = TRUE;
                  entry->dbe_skipsearch = TRUE;
+                 mutex_exit(entry->dbe_lock);
+         } else {
+                 entry->dbe_invalid = TRUE;
+                 entry->dbe_skipsearch = TRUE;
+         }
  }
  
  /*
   * Is this entry invalid?
   */
*** 127,137 ****
  void
  rfs4_dbe_rele(rfs4_dbe_t *entry)
  {
          mutex_enter(entry->dbe_lock);
          ASSERT(entry->dbe_refcnt > 1);
!         atomic_dec_32(&entry->dbe_refcnt);
          entry->dbe_time_rele = gethrestime_sec();
          mutex_exit(entry->dbe_lock);
  }
  
  void
--- 151,161 ----
  void
  rfs4_dbe_rele(rfs4_dbe_t *entry)
  {
          mutex_enter(entry->dbe_lock);
          ASSERT(entry->dbe_refcnt > 1);
!         entry->dbe_refcnt--;
          entry->dbe_time_rele = gethrestime_sec();
          mutex_exit(entry->dbe_lock);
  }
  
  void
*** 247,256 ****
--- 271,324 ----
  
          mutex_destroy(db->db_lock);
          kmem_free(db, sizeof (rfs4_database_t));
  }
  
+ /*
+  * Used to get the correct kmem_cache database for the state table being
+  * created.
+  * Helper function for rfs4_table_create
+  */
+ static kmem_cache_t *
+ get_db_mem_cache(char *name)
+ {
+         int i;
+ 
+         for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) {
+                 if (strcmp(name, rfs4_db_mem_cache_table[i].r_db_name) == 0)
+                         return (rfs4_db_mem_cache_table[i].r_db_mem_cache);
+         }
+         /*
+          * There is no associated kmem cache for this NFS4 server state
+          * table name
+          */
+         return (NULL);
+ }
+ 
+ /*
+  * Used to initialize the global NFSv4 server state database.
+  * Helper funtion for rfs4_state_g_init and called when module is loaded.
+  */
+ kmem_cache_t *
+ /* CSTYLED */
+ nfs4_init_mem_cache(char *cache_name, uint32_t idxcnt, uint32_t size, uint32_t idx)
+ {
+         kmem_cache_t *mem_cache = kmem_cache_create(cache_name,
+             sizeof (rfs4_dbe_t) + idxcnt * sizeof (rfs4_link_t) + size,
+             0,
+             rfs4_dbe_kmem_constructor,
+             rfs4_dbe_kmem_destructor,
+             NULL,
+             NULL,
+             NULL,
+             0);
+         (void) strlcpy(rfs4_db_mem_cache_table[idx].r_db_name, cache_name,
+             strlen(cache_name) + 1);
+         rfs4_db_mem_cache_table[idx].r_db_mem_cache = mem_cache;
+         return (mem_cache);
+ }
+ 
  rfs4_table_t *
  rfs4_table_create(rfs4_database_t *db, char *tabname, time_t max_cache_time,
      uint32_t idxcnt, bool_t (*create)(rfs4_entry_t, void *),
      void (*destroy)(rfs4_entry_t),
      bool_t (*expiry)(rfs4_entry_t),
*** 302,320 ****
          table->dbt_maxentries = maxentries;
          table->dbt_create = create;
          table->dbt_destroy = destroy;
          table->dbt_expiry = expiry;
  
!         table->dbt_mem_cache = kmem_cache_create(cache_name,
!             sizeof (rfs4_dbe_t) + idxcnt * sizeof (rfs4_link_t) + size,
!             0,
!             rfs4_dbe_kmem_constructor,
!             rfs4_dbe_kmem_destructor,
!             NULL,
!             table,
!             NULL,
!             0);
          kmem_free(cache_name, len+13);
  
          table->dbt_debug = db->db_debug_flags;
  
          mutex_enter(db->db_lock);
--- 370,384 ----
          table->dbt_maxentries = maxentries;
          table->dbt_create = create;
          table->dbt_destroy = destroy;
          table->dbt_expiry = expiry;
  
!         /*
!          * get the correct kmem_cache for this table type based on the name.
!          */
!         table->dbt_mem_cache = get_db_mem_cache(cache_name);
! 
          kmem_free(cache_name, len+13);
  
          table->dbt_debug = db->db_debug_flags;
  
          mutex_enter(db->db_lock);
*** 362,372 ****
          cv_destroy(&table->dbt_reaper_wait);
  
          kmem_free(table->dbt_name, strlen(table->dbt_name) + 1);
          if (table->dbt_id_space)
                  id_space_destroy(table->dbt_id_space);
!         kmem_cache_destroy(table->dbt_mem_cache);
          kmem_free(table, sizeof (rfs4_table_t));
  }
  
  rfs4_index_t *
  rfs4_index_create(rfs4_table_t *table, char *keyname,
--- 426,436 ----
          cv_destroy(&table->dbt_reaper_wait);
  
          kmem_free(table->dbt_name, strlen(table->dbt_name) + 1);
          if (table->dbt_id_space)
                  id_space_destroy(table->dbt_id_space);
!         table->dbt_mem_cache = NULL;
          kmem_free(table, sizeof (rfs4_table_t));
  }
  
  rfs4_index_t *
  rfs4_index_create(rfs4_table_t *table, char *keyname,
*** 681,696 ****
  
  /*ARGSUSED*/
  boolean_t
  rfs4_cpr_callb(void *arg, int code)
  {
-         rfs4_table_t *table = rfs4_client_tab;
          rfs4_bucket_t *buckets, *bp;
          rfs4_link_t *l;
          rfs4_client_t *cp;
          int i;
  
          /*
           * We get called for Suspend and Resume events.
           * For the suspend case we simply don't care!  Nor do we care if
           * there are no clients.
           */
--- 745,762 ----
  
  /*ARGSUSED*/
  boolean_t
  rfs4_cpr_callb(void *arg, int code)
  {
          rfs4_bucket_t *buckets, *bp;
          rfs4_link_t *l;
          rfs4_client_t *cp;
          int i;
  
+         nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
+         rfs4_table_t *table = nsrv4->rfs4_client_tab;
+ 
          /*
           * We get called for Suspend and Resume events.
           * For the suspend case we simply don't care!  Nor do we care if
           * there are no clients.
           */
*** 773,801 ****
              (CE_NOTE, "Reaping %d entries older than %ld seconds in table %s",
              desired, cache_time, table->dbt_name));
  
          /* Walk the buckets looking for entries to release/destroy */
          for (i = 0; i < table->dbt_len; i++) {
                  bp = &buckets[i];
                  do {
                          found = FALSE;
                          rw_enter(bp->dbk_lock, RW_READER);
                          for (l = bp->dbk_head; l; l = l->next) {
                                  entry = l->entry;
                                  /*
                                   * Examine an entry.  Ref count of 1 means
                                   * that the only reference is for the hash
                                   * table reference.
                                   */
!                                 if (entry->dbe_refcnt != 1)
                                          continue;
!                                 mutex_enter(entry->dbe_lock);
                                  if ((entry->dbe_refcnt == 1) &&
                                      (table->dbt_reaper_shutdown ||
                                      table->dbt_expiry == NULL ||
                                      (*table->dbt_expiry)(entry->dbe_data))) {
!                                         entry->dbe_refcnt--;
                                          count++;
                                          found = TRUE;
                                  }
                                  mutex_exit(entry->dbe_lock);
                          }
--- 839,874 ----
              (CE_NOTE, "Reaping %d entries older than %ld seconds in table %s",
              desired, cache_time, table->dbt_name));
  
          /* Walk the buckets looking for entries to release/destroy */
          for (i = 0; i < table->dbt_len; i++) {
+                 int retries = 0;
                  bp = &buckets[i];
                  do {
                          found = FALSE;
                          rw_enter(bp->dbk_lock, RW_READER);
                          for (l = bp->dbk_head; l; l = l->next) {
                                  entry = l->entry;
+                                 mutex_enter(entry->dbe_lock);
+                                 ASSERT(entry->dbe_refcnt != 0);
                                  /*
                                   * Examine an entry.  Ref count of 1 means
                                   * that the only reference is for the hash
                                   * table reference.
                                   */
!                                 if (entry->dbe_refcnt != 1) {
! #ifdef DEBUG
!                                         rfs4_dbe_debug(entry);
! #endif
!                                         mutex_exit(entry->dbe_lock);
                                          continue;
!                                 }
                                  if ((entry->dbe_refcnt == 1) &&
                                      (table->dbt_reaper_shutdown ||
                                      table->dbt_expiry == NULL ||
                                      (*table->dbt_expiry)(entry->dbe_data))) {
!                                         rfs4_dbe_rele_nolock(entry);
                                          count++;
                                          found = TRUE;
                                  }
                                  mutex_exit(entry->dbe_lock);
                          }
*** 808,826 ****
                                  l = bp->dbk_head;
                                  while (l) {
                                          t = l;
                                          entry = t->entry;
                                          l = l->next;
                                          if (entry->dbe_refcnt == 0) {
                                                  DEQUEUE(bp->dbk_head, t);
                                                  t->next = NULL;
                                                  t->prev = NULL;
                                                  INVALIDATE_ADDR(t->entry);
                                                  rfs4_dbe_destroy(entry);
                                          }
                                  }
-                         }
                          rw_exit(bp->dbk_lock);
                          /*
                           * delay slightly if there is more work to do
                           * with the expectation that other reaper
                           * threads are freeing data structures as well
--- 881,902 ----
                                  l = bp->dbk_head;
                                  while (l) {
                                          t = l;
                                          entry = t->entry;
                                          l = l->next;
+                                         mutex_enter(entry->dbe_lock);
                                          if (entry->dbe_refcnt == 0) {
                                                  DEQUEUE(bp->dbk_head, t);
+                                                 mutex_exit(entry->dbe_lock);
                                                  t->next = NULL;
                                                  t->prev = NULL;
                                                  INVALIDATE_ADDR(t->entry);
                                                  rfs4_dbe_destroy(entry);
+                                         } else
+                                                 mutex_exit(entry->dbe_lock);
                                  }
                          }
                          rw_exit(bp->dbk_lock);
                          /*
                           * delay slightly if there is more work to do
                           * with the expectation that other reaper
                           * threads are freeing data structures as well
*** 827,843 ****
                           * and in turn will reduce ref counts on
                           * entries in this table allowing them to be
                           * released.  This is only done in the
                           * instance that the tables are being shut down.
                           */
!                         if (table->dbt_reaper_shutdown && bp->dbk_head != NULL)
                                  delay(hz/100);
                  /*
                   * If this is a table shutdown, keep going until
                   * everything is gone
                   */
!                 } while (table->dbt_reaper_shutdown && bp->dbk_head != NULL);
  
                  if (!table->dbt_reaper_shutdown && desired && count >= desired)
                          break;
          }
  
--- 903,921 ----
                           * and in turn will reduce ref counts on
                           * entries in this table allowing them to be
                           * released.  This is only done in the
                           * instance that the tables are being shut down.
                           */
!                         if (table->dbt_reaper_shutdown && bp->dbk_head != NULL) {
                                  delay(hz/100);
+                                 retries++;
+                         }
                  /*
                   * If this is a table shutdown, keep going until
                   * everything is gone
                   */
!                 } while (table->dbt_reaper_shutdown && bp->dbk_head != NULL && retries < 5);
  
                  if (!table->dbt_reaper_shutdown && desired && count >= desired)
                          break;
          }
  
*** 877,895 ****
          /* Notify the database shutdown processing that the table is shutdown */
          mutex_enter(table->dbt_db->db_lock);
          table->dbt_db->db_shutdown_count--;
          cv_signal(&table->dbt_db->db_shutdown_wait);
          mutex_exit(table->dbt_db->db_lock);
  }
  
  static void
  rfs4_start_reaper(rfs4_table_t *table)
  {
          if (table->dbt_max_cache_time == 0)
                  return;
  
!         (void) thread_create(NULL, 0, reaper_thread, table, 0, &p0, TS_RUN,
              minclsyspri);
  }
  
  #ifdef DEBUG
  void
--- 955,974 ----
          /* Notify the database shutdown processing that the table is shutdown */
          mutex_enter(table->dbt_db->db_lock);
          table->dbt_db->db_shutdown_count--;
          cv_signal(&table->dbt_db->db_shutdown_wait);
          mutex_exit(table->dbt_db->db_lock);
+         zthread_exit();
  }
  
  static void
  rfs4_start_reaper(rfs4_table_t *table)
  {
          if (table->dbt_max_cache_time == 0)
                  return;
  
!         (void) zthread_create(NULL, 0, reaper_thread, table, 0,
              minclsyspri);
  }
  
  #ifdef DEBUG
  void