Print this page
10592 misc. metaslab and vdev related ZoL bug fixes
Portions contributed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed by: Giuseppe Di Natale <guss80@gmail.com>
Reviewed by: George Melikov <mail@gmelikov.ru>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed by: Tony Hutter <hutter2@llnl.gov>
Reviewed by: Kody Kantor <kody.kantor@joyent.com>
Approved by: Dan McDonald <danmcd@joyent.com>

@@ -499,11 +499,10 @@
 
         list_link_init(&vd->vdev_leaf_node);
         mutex_init(&vd->vdev_dtl_lock, NULL, MUTEX_DEFAULT, NULL);
         mutex_init(&vd->vdev_stat_lock, NULL, MUTEX_DEFAULT, NULL);
         mutex_init(&vd->vdev_probe_lock, NULL, MUTEX_DEFAULT, NULL);
-        mutex_init(&vd->vdev_queue_lock, NULL, MUTEX_DEFAULT, NULL);
         mutex_init(&vd->vdev_initialize_lock, NULL, MUTEX_DEFAULT, NULL);
         mutex_init(&vd->vdev_initialize_io_lock, NULL, MUTEX_DEFAULT, NULL);
         cv_init(&vd->vdev_initialize_cv, NULL, CV_DEFAULT, NULL);
         cv_init(&vd->vdev_initialize_io_cv, NULL, CV_DEFAULT, NULL);
 

@@ -887,11 +886,10 @@
         }
         range_tree_destroy(vd->vdev_obsolete_segments);
         rw_destroy(&vd->vdev_indirect_rwlock);
         mutex_destroy(&vd->vdev_obsolete_lock);
 
-        mutex_destroy(&vd->vdev_queue_lock);
         mutex_destroy(&vd->vdev_dtl_lock);
         mutex_destroy(&vd->vdev_stat_lock);
         mutex_destroy(&vd->vdev_probe_lock);
         mutex_destroy(&vd->vdev_initialize_lock);
         mutex_destroy(&vd->vdev_initialize_io_lock);

@@ -1249,23 +1247,26 @@
                  */
                 vd->vdev_checkpoint_sm = NULL;
         }
 
         if (vd->vdev_ms != NULL) {
-                uint64_t count = vd->vdev_ms_count;
+                metaslab_group_t *mg = vd->vdev_mg;
+                metaslab_group_passivate(mg);
 
-                metaslab_group_passivate(vd->vdev_mg);
+                uint64_t count = vd->vdev_ms_count;
                 for (uint64_t m = 0; m < count; m++) {
                         metaslab_t *msp = vd->vdev_ms[m];
-
                         if (msp != NULL)
                                 metaslab_fini(msp);
                 }
                 kmem_free(vd->vdev_ms, count * sizeof (metaslab_t *));
                 vd->vdev_ms = NULL;
 
                 vd->vdev_ms_count = 0;
+
+                for (int i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++)
+                        ASSERT0(mg->mg_histogram[i]);
         }
         ASSERT0(vd->vdev_ms_count);
 }
 
 typedef struct vdev_probe_stats {

@@ -2547,17 +2548,10 @@
                 if (error)
                         return (error);
                 ASSERT(vd->vdev_dtl_sm != NULL);
 
                 mutex_enter(&vd->vdev_dtl_lock);
-
-                /*
-                 * Now that we've opened the space_map we need to update
-                 * the in-core DTL.
-                 */
-                space_map_update(vd->vdev_dtl_sm);
-
                 error = space_map_load(vd->vdev_dtl_sm,
                     vd->vdev_dtl[DTL_MISSING], SM_ALLOC);
                 mutex_exit(&vd->vdev_dtl_lock);
 
                 return (error);

@@ -2713,14 +2707,10 @@
                     (u_longlong_t)space_map_object(vd->vdev_dtl_sm));
                 vdev_config_dirty(vd->vdev_top);
         }
 
         dmu_tx_commit(tx);
-
-        mutex_enter(&vd->vdev_dtl_lock);
-        space_map_update(vd->vdev_dtl_sm);
-        mutex_exit(&vd->vdev_dtl_lock);
 }
 
 /*
  * Determine whether the specified vdev can be offlined/detached/removed
  * without losing data.

@@ -2859,11 +2849,14 @@
                             VDEV_AUX_CORRUPT_DATA);
                         vdev_dbgmsg(vd, "vdev_load: invalid size. ashift=%llu, "
                             "asize=%llu", (u_longlong_t)vd->vdev_ashift,
                             (u_longlong_t)vd->vdev_asize);
                         return (SET_ERROR(ENXIO));
-                } else if ((error = vdev_metaslab_init(vd, 0)) != 0) {
+                }
+
+                error = vdev_metaslab_init(vd, 0);
+                if (error != 0) {
                         vdev_dbgmsg(vd, "vdev_load: metaslab_init failed "
                             "[error=%d]", error);
                         vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
                             VDEV_AUX_CORRUPT_DATA);
                         return (error);

@@ -2873,29 +2866,30 @@
                 if (checkpoint_sm_obj != 0) {
                         objset_t *mos = spa_meta_objset(vd->vdev_spa);
                         ASSERT(vd->vdev_asize != 0);
                         ASSERT3P(vd->vdev_checkpoint_sm, ==, NULL);
 
-                        if ((error = space_map_open(&vd->vdev_checkpoint_sm,
+                        error = space_map_open(&vd->vdev_checkpoint_sm,
                             mos, checkpoint_sm_obj, 0, vd->vdev_asize,
-                            vd->vdev_ashift))) {
+                            vd->vdev_ashift);
+                        if (error != 0) {
                                 vdev_dbgmsg(vd, "vdev_load: space_map_open "
                                     "failed for checkpoint spacemap (obj %llu) "
                                     "[error=%d]",
                                     (u_longlong_t)checkpoint_sm_obj, error);
                                 return (error);
                         }
                         ASSERT3P(vd->vdev_checkpoint_sm, !=, NULL);
-                        space_map_update(vd->vdev_checkpoint_sm);
 
                         /*
                          * Since the checkpoint_sm contains free entries
-                         * exclusively we can use sm_alloc to indicate the
-                         * culmulative checkpointed space that has been freed.
+                         * exclusively we can use space_map_allocated() to
+                         * indicate the cumulative checkpointed space that
+                         * has been freed.
                          */
                         vd->vdev_stat.vs_checkpoint_space =
-                            -vd->vdev_checkpoint_sm->sm_alloc;
+                            -space_map_allocated(vd->vdev_checkpoint_sm);
                         vd->vdev_spa->spa_checkpoint_info.sci_dspace +=
                             vd->vdev_stat.vs_checkpoint_space;
                 }
         }
 

@@ -2923,11 +2917,10 @@
                         vdev_dbgmsg(vd, "vdev_load: space_map_open failed for "
                             "obsolete spacemap (obj %llu) [error=%d]",
                             (u_longlong_t)obsolete_sm_object, error);
                         return (error);
                 }
-                space_map_update(vd->vdev_obsolete_sm);
         }
 
         return (0);
 }
 

@@ -3010,51 +3003,10 @@
 
         ASSERT(vd->vdev_islog);
         ASSERT(vd == vd->vdev_top);
         ASSERT3U(txg, ==, spa_syncing_txg(spa));
 
-        if (vd->vdev_ms != NULL) {
-                metaslab_group_t *mg = vd->vdev_mg;
-
-                metaslab_group_histogram_verify(mg);
-                metaslab_class_histogram_verify(mg->mg_class);
-
-                for (int m = 0; m < vd->vdev_ms_count; m++) {
-                        metaslab_t *msp = vd->vdev_ms[m];
-
-                        if (msp == NULL || msp->ms_sm == NULL)
-                                continue;
-
-                        mutex_enter(&msp->ms_lock);
-                        /*
-                         * If the metaslab was not loaded when the vdev
-                         * was removed then the histogram accounting may
-                         * not be accurate. Update the histogram information
-                         * here so that we ensure that the metaslab group
-                         * and metaslab class are up-to-date.
-                         */
-                        metaslab_group_histogram_remove(mg, msp);
-
-                        VERIFY0(space_map_allocated(msp->ms_sm));
-                        space_map_close(msp->ms_sm);
-                        msp->ms_sm = NULL;
-                        mutex_exit(&msp->ms_lock);
-                }
-
-                if (vd->vdev_checkpoint_sm != NULL) {
-                        ASSERT(spa_has_checkpoint(spa));
-                        space_map_close(vd->vdev_checkpoint_sm);
-                        vd->vdev_checkpoint_sm = NULL;
-                }
-
-                metaslab_group_histogram_verify(mg);
-                metaslab_class_histogram_verify(mg->mg_class);
-
-                for (int i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++)
-                        ASSERT0(mg->mg_histogram[i]);
-        }
-
         dmu_tx_t *tx = dmu_tx_create_assigned(spa_get_dsl(spa), txg);
 
         vdev_destroy_spacemaps(vd, tx);
         if (vd->vdev_top_zap != 0) {
                 vdev_destroy_unlink_zap(vd, vd->vdev_top_zap, tx);

@@ -3084,29 +3036,27 @@
 vdev_sync(vdev_t *vd, uint64_t txg)
 {
         spa_t *spa = vd->vdev_spa;
         vdev_t *lvd;
         metaslab_t *msp;
-        dmu_tx_t *tx;
 
+        ASSERT3U(txg, ==, spa->spa_syncing_txg);
+        dmu_tx_t *tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg);
         if (range_tree_space(vd->vdev_obsolete_segments) > 0) {
-                dmu_tx_t *tx;
-
                 ASSERT(vd->vdev_removing ||
                     vd->vdev_ops == &vdev_indirect_ops);
 
-                tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg);
                 vdev_indirect_sync_obsolete(vd, tx);
-                dmu_tx_commit(tx);
 
                 /*
                  * If the vdev is indirect, it can't have dirty
                  * metaslabs or DTLs.
                  */
                 if (vd->vdev_ops == &vdev_indirect_ops) {
                         ASSERT(txg_list_empty(&vd->vdev_ms_list, txg));
                         ASSERT(txg_list_empty(&vd->vdev_dtl_list, txg));
+                        dmu_tx_commit(tx);
                         return;
                 }
         }
 
         ASSERT(vdev_is_concrete(vd));

@@ -3113,16 +3063,14 @@
 
         if (vd->vdev_ms_array == 0 && vd->vdev_ms_shift != 0 &&
             !vd->vdev_removing) {
                 ASSERT(vd == vd->vdev_top);
                 ASSERT0(vd->vdev_indirect_config.vic_mapping_object);
-                tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg);
                 vd->vdev_ms_array = dmu_object_alloc(spa->spa_meta_objset,
                     DMU_OT_OBJECT_ARRAY, 0, DMU_OT_NONE, 0, tx);
                 ASSERT(vd->vdev_ms_array != 0);
                 vdev_config_dirty(vd);
-                dmu_tx_commit(tx);
         }
 
         while ((msp = txg_list_remove(&vd->vdev_ms_list, txg)) != NULL) {
                 metaslab_sync(msp, txg);
                 (void) txg_list_add(&vd->vdev_ms_list, msp, TXG_CLEAN(txg));

@@ -3137,10 +3085,11 @@
          */
         if (vd->vdev_islog && vd->vdev_stat.vs_alloc == 0 && vd->vdev_removing)
                 vdev_remove_empty_log(vd, txg);
 
         (void) txg_list_add(&spa->spa_vdev_txg_list, vd, TXG_CLEAN(txg));
+        dmu_tx_commit(tx);
 }
 
 uint64_t
 vdev_psize_to_asize(vdev_t *vd, uint64_t psize)
 {

@@ -3366,12 +3315,10 @@
                          * If the log device was successfully reset but has
                          * checkpointed data, do not offline it.
                          */
                         if (error == 0 &&
                             tvd->vdev_checkpoint_sm != NULL) {
-                                ASSERT3U(tvd->vdev_checkpoint_sm->sm_alloc,
-                                    !=, 0);
                                 error = ZFS_ERR_CHECKPOINT_EXISTS;
                         }
 
                         spa_vdev_state_enter(spa, SCL_ALLOC);