Print this page
5056 ZFS deadlock on db_mtx and dn_holds
Reviewed by: Will Andrews <willa@spectralogic.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Approved by: Dan McDonald <danmcd@omniti.com>


   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
  25  * Copyright (c) 2013, 2014, Nexenta Systems, Inc.  All rights reserved.

  26  */
  27 
  28 /*
  29  * SPA: Storage Pool Allocator
  30  *
  31  * This file contains all the routines used when modifying on-disk SPA state.
  32  * This includes opening, importing, destroying, exporting a pool, and syncing a
  33  * pool.
  34  */
  35 
  36 #include <sys/zfs_context.h>
  37 #include <sys/fm/fs/zfs.h>
  38 #include <sys/spa_impl.h>
  39 #include <sys/zio.h>
  40 #include <sys/zio_checksum.h>
  41 #include <sys/dmu.h>
  42 #include <sys/dmu_tx.h>
  43 #include <sys/zap.h>
  44 #include <sys/zil.h>
  45 #include <sys/ddt.h>


1071                         ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE);
1072                         ASSERT(spa->spa_proc != &p0);
1073                         ASSERT(spa->spa_did != 0);
1074                 } else {
1075 #ifdef _KERNEL
1076                         cmn_err(CE_WARN,
1077                             "Couldn't create process for zfs pool \"%s\"\n",
1078                             spa->spa_name);
1079 #endif
1080                 }
1081         }
1082         mutex_exit(&spa->spa_proc_lock);
1083 
1084         /* If we didn't create a process, we need to create our taskqs. */
1085         if (spa->spa_proc == &p0) {
1086                 spa_create_zio_taskqs(spa);
1087         }
1088 
1089         list_create(&spa->spa_config_dirty_list, sizeof (vdev_t),
1090             offsetof(vdev_t, vdev_config_dirty_node));


1091         list_create(&spa->spa_state_dirty_list, sizeof (vdev_t),
1092             offsetof(vdev_t, vdev_state_dirty_node));
1093 
1094         txg_list_create(&spa->spa_vdev_txg_list,
1095             offsetof(struct vdev, vdev_txg_node));
1096 
1097         avl_create(&spa->spa_errlist_scrub,
1098             spa_error_entry_compare, sizeof (spa_error_entry_t),
1099             offsetof(spa_error_entry_t, se_avl));
1100         avl_create(&spa->spa_errlist_last,
1101             spa_error_entry_compare, sizeof (spa_error_entry_t),
1102             offsetof(spa_error_entry_t, se_avl));
1103 }
1104 
1105 /*
1106  * Opposite of spa_activate().
1107  */
1108 static void
1109 spa_deactivate(spa_t *spa)
1110 {
1111         ASSERT(spa->spa_sync_on == B_FALSE);
1112         ASSERT(spa->spa_dsl_pool == NULL);
1113         ASSERT(spa->spa_root_vdev == NULL);
1114         ASSERT(spa->spa_async_zio_root == NULL);
1115         ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED);
1116 


1117         txg_list_destroy(&spa->spa_vdev_txg_list);
1118 
1119         list_destroy(&spa->spa_config_dirty_list);

1120         list_destroy(&spa->spa_state_dirty_list);
1121 
1122         for (int t = 0; t < ZIO_TYPES; t++) {
1123                 for (int q = 0; q < ZIO_TASKQ_TYPES; q++) {
1124                         spa_taskqs_fini(spa, t, q);
1125                 }
1126         }
1127 
1128         metaslab_class_destroy(spa->spa_normal_class);
1129         spa->spa_normal_class = NULL;
1130 
1131         metaslab_class_destroy(spa->spa_log_class);
1132         spa->spa_log_class = NULL;
1133 
1134         /*
1135          * If this was part of an import or the open otherwise failed, we may
1136          * still have errors left in the queues.  Empty them just in case.
1137          */
1138         spa_errlog_drain(spa);
1139 


2090         if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) &&
2091             spa_guid_exists(pool_guid, 0)) {
2092                 error = SET_ERROR(EEXIST);
2093         } else {
2094                 spa->spa_config_guid = pool_guid;
2095 
2096                 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT,
2097                     &nvl) == 0) {
2098                         VERIFY(nvlist_dup(nvl, &spa->spa_config_splitting,
2099                             KM_SLEEP) == 0);
2100                 }
2101 
2102                 nvlist_free(spa->spa_load_info);
2103                 spa->spa_load_info = fnvlist_alloc();
2104 
2105                 gethrestime(&spa->spa_loaded_ts);
2106                 error = spa_load_impl(spa, pool_guid, config, state, type,
2107                     mosconfig, &ereport);
2108         }
2109 





2110         spa->spa_minref = refcount_count(&spa->spa_refcount);
2111         if (error) {
2112                 if (error != EEXIST) {
2113                         spa->spa_loaded_ts.tv_sec = 0;
2114                         spa->spa_loaded_ts.tv_nsec = 0;
2115                 }
2116                 if (error != EBADF) {
2117                         zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0);
2118                 }
2119         }
2120         spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE;
2121         spa->spa_ena = 0;
2122 
2123         return (error);
2124 }
2125 
2126 /*
2127  * Load an existing storage pool, using the pool's builtin spa_config as a
2128  * source of configuration information.
2129  */


3658         if (props != NULL) {
3659                 spa_configfile_set(spa, props, B_FALSE);
3660                 spa_sync_props(props, tx);
3661         }
3662 
3663         dmu_tx_commit(tx);
3664 
3665         spa->spa_sync_on = B_TRUE;
3666         txg_sync_start(spa->spa_dsl_pool);
3667 
3668         /*
3669          * We explicitly wait for the first transaction to complete so that our
3670          * bean counters are appropriately updated.
3671          */
3672         txg_wait_synced(spa->spa_dsl_pool, txg);
3673 
3674         spa_config_sync(spa, B_FALSE, B_TRUE);
3675 
3676         spa_history_log_version(spa, "create");
3677 





3678         spa->spa_minref = refcount_count(&spa->spa_refcount);
3679 
3680         mutex_exit(&spa_namespace_lock);
3681 
3682         return (0);
3683 }
3684 
3685 #ifdef _KERNEL
3686 /*
3687  * Get the root pool information from the root disk, then import the root pool
3688  * during the system boot up time.
3689  */
3690 extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **);
3691 
3692 static nvlist_t *
3693 spa_generate_rootconf(char *devpath, char *devid, uint64_t *guid)
3694 {
3695         nvlist_t *config;
3696         nvlist_t *nvtop, *nvroot;
3697         uint64_t pgid;


4190         /*
4191          * Put a hold on the pool, drop the namespace lock, stop async tasks,
4192          * reacquire the namespace lock, and see if we can export.
4193          */
4194         spa_open_ref(spa, FTAG);
4195         mutex_exit(&spa_namespace_lock);
4196         spa_async_suspend(spa);
4197         mutex_enter(&spa_namespace_lock);
4198         spa_close(spa, FTAG);
4199 
4200         /*
4201          * The pool will be in core if it's openable,
4202          * in which case we can modify its state.
4203          */
4204         if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) {
4205                 /*
4206                  * Objsets may be open only because they're dirty, so we
4207                  * have to force it to sync before checking spa_refcnt.
4208                  */
4209                 txg_wait_synced(spa->spa_dsl_pool, 0);

4210 
4211                 /*
4212                  * A pool cannot be exported or destroyed if there are active
4213                  * references.  If we are resetting a pool, allow references by
4214                  * fault injection handlers.
4215                  */
4216                 if (!spa_refcount_zero(spa) ||
4217                     (spa->spa_inject_ref != 0 &&
4218                     new_state != POOL_STATE_UNINITIALIZED)) {
4219                         spa_async_resume(spa);
4220                         mutex_exit(&spa_namespace_lock);
4221                         return (SET_ERROR(EBUSY));
4222                 }
4223 
4224                 /*
4225                  * A pool cannot be exported if it has an active shared spare.
4226                  * This is to prevent other pools stealing the active spare
4227                  * from an exported pool. At user's own will, such pool can
4228                  * be forcedly exported.
4229                  */




   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
  25  * Copyright (c) 2013, 2014, Nexenta Systems, Inc.  All rights reserved.
  26  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  27  */
  28 
  29 /*
  30  * SPA: Storage Pool Allocator
  31  *
  32  * This file contains all the routines used when modifying on-disk SPA state.
  33  * This includes opening, importing, destroying, exporting a pool, and syncing a
  34  * pool.
  35  */
  36 
  37 #include <sys/zfs_context.h>
  38 #include <sys/fm/fs/zfs.h>
  39 #include <sys/spa_impl.h>
  40 #include <sys/zio.h>
  41 #include <sys/zio_checksum.h>
  42 #include <sys/dmu.h>
  43 #include <sys/dmu_tx.h>
  44 #include <sys/zap.h>
  45 #include <sys/zil.h>
  46 #include <sys/ddt.h>


1072                         ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE);
1073                         ASSERT(spa->spa_proc != &p0);
1074                         ASSERT(spa->spa_did != 0);
1075                 } else {
1076 #ifdef _KERNEL
1077                         cmn_err(CE_WARN,
1078                             "Couldn't create process for zfs pool \"%s\"\n",
1079                             spa->spa_name);
1080 #endif
1081                 }
1082         }
1083         mutex_exit(&spa->spa_proc_lock);
1084 
1085         /* If we didn't create a process, we need to create our taskqs. */
1086         if (spa->spa_proc == &p0) {
1087                 spa_create_zio_taskqs(spa);
1088         }
1089 
1090         list_create(&spa->spa_config_dirty_list, sizeof (vdev_t),
1091             offsetof(vdev_t, vdev_config_dirty_node));
1092         list_create(&spa->spa_evicting_os_list, sizeof (objset_t),
1093             offsetof(objset_t, os_evicting_node));
1094         list_create(&spa->spa_state_dirty_list, sizeof (vdev_t),
1095             offsetof(vdev_t, vdev_state_dirty_node));
1096 
1097         txg_list_create(&spa->spa_vdev_txg_list,
1098             offsetof(struct vdev, vdev_txg_node));
1099 
1100         avl_create(&spa->spa_errlist_scrub,
1101             spa_error_entry_compare, sizeof (spa_error_entry_t),
1102             offsetof(spa_error_entry_t, se_avl));
1103         avl_create(&spa->spa_errlist_last,
1104             spa_error_entry_compare, sizeof (spa_error_entry_t),
1105             offsetof(spa_error_entry_t, se_avl));
1106 }
1107 
1108 /*
1109  * Opposite of spa_activate().
1110  */
1111 static void
1112 spa_deactivate(spa_t *spa)
1113 {
1114         ASSERT(spa->spa_sync_on == B_FALSE);
1115         ASSERT(spa->spa_dsl_pool == NULL);
1116         ASSERT(spa->spa_root_vdev == NULL);
1117         ASSERT(spa->spa_async_zio_root == NULL);
1118         ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED);
1119 
1120         spa_evicting_os_wait(spa);
1121 
1122         txg_list_destroy(&spa->spa_vdev_txg_list);
1123 
1124         list_destroy(&spa->spa_config_dirty_list);
1125         list_destroy(&spa->spa_evicting_os_list);
1126         list_destroy(&spa->spa_state_dirty_list);
1127 
1128         for (int t = 0; t < ZIO_TYPES; t++) {
1129                 for (int q = 0; q < ZIO_TASKQ_TYPES; q++) {
1130                         spa_taskqs_fini(spa, t, q);
1131                 }
1132         }
1133 
1134         metaslab_class_destroy(spa->spa_normal_class);
1135         spa->spa_normal_class = NULL;
1136 
1137         metaslab_class_destroy(spa->spa_log_class);
1138         spa->spa_log_class = NULL;
1139 
1140         /*
1141          * If this was part of an import or the open otherwise failed, we may
1142          * still have errors left in the queues.  Empty them just in case.
1143          */
1144         spa_errlog_drain(spa);
1145 


2096         if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) &&
2097             spa_guid_exists(pool_guid, 0)) {
2098                 error = SET_ERROR(EEXIST);
2099         } else {
2100                 spa->spa_config_guid = pool_guid;
2101 
2102                 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT,
2103                     &nvl) == 0) {
2104                         VERIFY(nvlist_dup(nvl, &spa->spa_config_splitting,
2105                             KM_SLEEP) == 0);
2106                 }
2107 
2108                 nvlist_free(spa->spa_load_info);
2109                 spa->spa_load_info = fnvlist_alloc();
2110 
2111                 gethrestime(&spa->spa_loaded_ts);
2112                 error = spa_load_impl(spa, pool_guid, config, state, type,
2113                     mosconfig, &ereport);
2114         }
2115 
2116         /*
2117          * Don't count references from objsets that are already closed
2118          * and are making their way through the eviction process.
2119          */
2120         spa_evicting_os_wait(spa);
2121         spa->spa_minref = refcount_count(&spa->spa_refcount);
2122         if (error) {
2123                 if (error != EEXIST) {
2124                         spa->spa_loaded_ts.tv_sec = 0;
2125                         spa->spa_loaded_ts.tv_nsec = 0;
2126                 }
2127                 if (error != EBADF) {
2128                         zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0);
2129                 }
2130         }
2131         spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE;
2132         spa->spa_ena = 0;
2133 
2134         return (error);
2135 }
2136 
2137 /*
2138  * Load an existing storage pool, using the pool's builtin spa_config as a
2139  * source of configuration information.
2140  */


3669         if (props != NULL) {
3670                 spa_configfile_set(spa, props, B_FALSE);
3671                 spa_sync_props(props, tx);
3672         }
3673 
3674         dmu_tx_commit(tx);
3675 
3676         spa->spa_sync_on = B_TRUE;
3677         txg_sync_start(spa->spa_dsl_pool);
3678 
3679         /*
3680          * We explicitly wait for the first transaction to complete so that our
3681          * bean counters are appropriately updated.
3682          */
3683         txg_wait_synced(spa->spa_dsl_pool, txg);
3684 
3685         spa_config_sync(spa, B_FALSE, B_TRUE);
3686 
3687         spa_history_log_version(spa, "create");
3688 
3689         /*
3690          * Don't count references from objsets that are already closed
3691          * and are making their way through the eviction process.
3692          */
3693         spa_evicting_os_wait(spa);
3694         spa->spa_minref = refcount_count(&spa->spa_refcount);
3695 
3696         mutex_exit(&spa_namespace_lock);
3697 
3698         return (0);
3699 }
3700 
3701 #ifdef _KERNEL
3702 /*
3703  * Get the root pool information from the root disk, then import the root pool
3704  * during the system boot up time.
3705  */
3706 extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **);
3707 
3708 static nvlist_t *
3709 spa_generate_rootconf(char *devpath, char *devid, uint64_t *guid)
3710 {
3711         nvlist_t *config;
3712         nvlist_t *nvtop, *nvroot;
3713         uint64_t pgid;


4206         /*
4207          * Put a hold on the pool, drop the namespace lock, stop async tasks,
4208          * reacquire the namespace lock, and see if we can export.
4209          */
4210         spa_open_ref(spa, FTAG);
4211         mutex_exit(&spa_namespace_lock);
4212         spa_async_suspend(spa);
4213         mutex_enter(&spa_namespace_lock);
4214         spa_close(spa, FTAG);
4215 
4216         /*
4217          * The pool will be in core if it's openable,
4218          * in which case we can modify its state.
4219          */
4220         if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) {
4221                 /*
4222                  * Objsets may be open only because they're dirty, so we
4223                  * have to force it to sync before checking spa_refcnt.
4224                  */
4225                 txg_wait_synced(spa->spa_dsl_pool, 0);
4226                 spa_evicting_os_wait(spa);
4227 
4228                 /*
4229                  * A pool cannot be exported or destroyed if there are active
4230                  * references.  If we are resetting a pool, allow references by
4231                  * fault injection handlers.
4232                  */
4233                 if (!spa_refcount_zero(spa) ||
4234                     (spa->spa_inject_ref != 0 &&
4235                     new_state != POOL_STATE_UNINITIALIZED)) {
4236                         spa_async_resume(spa);
4237                         mutex_exit(&spa_namespace_lock);
4238                         return (SET_ERROR(EBUSY));
4239                 }
4240 
4241                 /*
4242                  * A pool cannot be exported if it has an active shared spare.
4243                  * This is to prevent other pools stealing the active spare
4244                  * from an exported pool. At user's own will, such pool can
4245                  * be forcedly exported.
4246                  */