Print this page
5056 ZFS deadlock on db_mtx and dn_holds
Reviewed by: Will Andrews <willa@spectralogic.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Approved by: Dan McDonald <danmcd@omniti.com>

@@ -20,10 +20,11 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  */
 
 #include <sys/zfs_context.h>
 #include <sys/spa_impl.h>
 #include <sys/spa_boot.h>

@@ -549,19 +550,21 @@
         spa = kmem_zalloc(sizeof (spa_t), KM_SLEEP);
 
         mutex_init(&spa->spa_async_lock, NULL, MUTEX_DEFAULT, NULL);
         mutex_init(&spa->spa_errlist_lock, NULL, MUTEX_DEFAULT, NULL);
         mutex_init(&spa->spa_errlog_lock, NULL, MUTEX_DEFAULT, NULL);
+        mutex_init(&spa->spa_evicting_os_lock, NULL, MUTEX_DEFAULT, NULL);
         mutex_init(&spa->spa_history_lock, NULL, MUTEX_DEFAULT, NULL);
         mutex_init(&spa->spa_proc_lock, NULL, MUTEX_DEFAULT, NULL);
         mutex_init(&spa->spa_props_lock, NULL, MUTEX_DEFAULT, NULL);
         mutex_init(&spa->spa_scrub_lock, NULL, MUTEX_DEFAULT, NULL);
         mutex_init(&spa->spa_suspend_lock, NULL, MUTEX_DEFAULT, NULL);
         mutex_init(&spa->spa_vdev_top_lock, NULL, MUTEX_DEFAULT, NULL);
         mutex_init(&spa->spa_iokstat_lock, NULL, MUTEX_DEFAULT, NULL);
 
         cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL);
+        cv_init(&spa->spa_evicting_os_cv, NULL, CV_DEFAULT, NULL);
         cv_init(&spa->spa_proc_cv, NULL, CV_DEFAULT, NULL);
         cv_init(&spa->spa_scrub_io_cv, NULL, CV_DEFAULT, NULL);
         cv_init(&spa->spa_suspend_cv, NULL, CV_DEFAULT, NULL);
 
         for (int t = 0; t < TXG_SIZE; t++)

@@ -667,10 +670,11 @@
 {
         spa_config_dirent_t *dp;
 
         ASSERT(MUTEX_HELD(&spa_namespace_lock));
         ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED);
+        ASSERT3U(refcount_count(&spa->spa_refcount), ==, 0);
 
         nvlist_free(spa->spa_config_splitting);
 
         avl_remove(&spa_namespace_avl, spa);
         cv_broadcast(&spa_namespace_cv);

@@ -708,17 +712,19 @@
 
         for (int t = 0; t < TXG_SIZE; t++)
                 bplist_destroy(&spa->spa_free_bplist[t]);
 
         cv_destroy(&spa->spa_async_cv);
+        cv_destroy(&spa->spa_evicting_os_cv);
         cv_destroy(&spa->spa_proc_cv);
         cv_destroy(&spa->spa_scrub_io_cv);
         cv_destroy(&spa->spa_suspend_cv);
 
         mutex_destroy(&spa->spa_async_lock);
         mutex_destroy(&spa->spa_errlist_lock);
         mutex_destroy(&spa->spa_errlog_lock);
+        mutex_destroy(&spa->spa_evicting_os_lock);
         mutex_destroy(&spa->spa_history_lock);
         mutex_destroy(&spa->spa_proc_lock);
         mutex_destroy(&spa->spa_props_lock);
         mutex_destroy(&spa->spa_scrub_lock);
         mutex_destroy(&spa->spa_suspend_lock);

@@ -772,10 +778,24 @@
             MUTEX_HELD(&spa_namespace_lock));
         (void) refcount_remove(&spa->spa_refcount, tag);
 }
 
 /*
+ * Remove a reference to the given spa_t held by a dsl dir that is
+ * being asynchronously released.  Async releases occur from a taskq
+ * performing eviction of dsl datasets and dirs.  The namespace lock
+ * isn't held and the hold by the object being evicted may contribute to
+ * spa_minref (e.g. dataset or directory released during pool export),
+ * so the asserts in spa_close() do not apply.
+ */
+void
+spa_async_close(spa_t *spa, void *tag)
+{
+        (void) refcount_remove(&spa->spa_refcount, tag);
+}
+
+/*
  * Check to see if the spa refcount is zero.  Must be called with
  * spa_namespace_lock held.  We really compare against spa_minref, which is the
  * number of references acquired when opening a pool
  */
 boolean_t

@@ -1667,10 +1687,38 @@
 spa_log_class(spa_t *spa)
 {
         return (spa->spa_log_class);
 }
 
+void
+spa_evicting_os_register(spa_t *spa, objset_t *os)
+{
+        mutex_enter(&spa->spa_evicting_os_lock);
+        list_insert_head(&spa->spa_evicting_os_list, os);
+        mutex_exit(&spa->spa_evicting_os_lock);
+}
+
+void
+spa_evicting_os_deregister(spa_t *spa, objset_t *os)
+{
+        mutex_enter(&spa->spa_evicting_os_lock);
+        list_remove(&spa->spa_evicting_os_list, os);
+        cv_broadcast(&spa->spa_evicting_os_cv);
+        mutex_exit(&spa->spa_evicting_os_lock);
+}
+
+void
+spa_evicting_os_wait(spa_t *spa)
+{
+        mutex_enter(&spa->spa_evicting_os_lock);
+        while (!list_is_empty(&spa->spa_evicting_os_list))
+                cv_wait(&spa->spa_evicting_os_cv, &spa->spa_evicting_os_lock);
+        mutex_exit(&spa->spa_evicting_os_lock);
+
+        dmu_buf_user_evict_wait();
+}
+
 int
 spa_max_replication(spa_t *spa)
 {
         /*
          * As of SPA_VERSION == SPA_VERSION_DITTO_BLOCKS, we are able to