Print this page
10592 misc. metaslab and vdev related ZoL bug fixes
Portions contributed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed by: Giuseppe Di Natale <guss80@gmail.com>
Reviewed by: George Melikov <mail@gmelikov.ru>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed by: Tony Hutter <hutter2@llnl.gov>
Reviewed by: Kody Kantor <kody.kantor@joyent.com>
Approved by: Dan McDonald <danmcd@joyent.com>


 229 
 230         /*
 231          * Top-level vdev state.
 232          */
 233         uint64_t        vdev_ms_array;  /* metaslab array object        */
 234         uint64_t        vdev_ms_shift;  /* metaslab size shift          */
 235         uint64_t        vdev_ms_count;  /* number of metaslabs          */
 236         metaslab_group_t *vdev_mg;      /* metaslab group               */
 237         metaslab_t      **vdev_ms;      /* metaslab array               */
 238         txg_list_t      vdev_ms_list;   /* per-txg dirty metaslab lists */
 239         txg_list_t      vdev_dtl_list;  /* per-txg dirty DTL lists      */
 240         txg_node_t      vdev_txg_node;  /* per-txg dirty vdev linkage   */
 241         boolean_t       vdev_remove_wanted; /* async remove wanted?     */
 242         boolean_t       vdev_probe_wanted; /* async probe wanted?       */
 243         list_node_t     vdev_config_dirty_node; /* config dirty list    */
 244         list_node_t     vdev_state_dirty_node; /* state dirty list      */
 245         uint64_t        vdev_deflate_ratio; /* deflation ratio (x512)   */
 246         uint64_t        vdev_islog;     /* is an intent log device      */
 247         uint64_t        vdev_removing;  /* device is being removed?     */
 248         boolean_t       vdev_ishole;    /* is a hole in the namespace   */
 249         kmutex_t        vdev_queue_lock; /* protects vdev_queue_depth   */
 250         uint64_t        vdev_top_zap;
 251         vdev_alloc_bias_t vdev_alloc_bias; /* metaslab allocation bias  */
 252 
 253         /* pool checkpoint related */
 254         space_map_t     *vdev_checkpoint_sm;    /* contains reserved blocks */
 255 
 256         boolean_t       vdev_initialize_exit_wanted;
 257         vdev_initializing_state_t       vdev_initialize_state;
 258         kthread_t       *vdev_initialize_thread;
 259         /* Protects vdev_initialize_thread and vdev_initialize_state. */
 260         kmutex_t        vdev_initialize_lock;
 261         kcondvar_t      vdev_initialize_cv;
 262         uint64_t        vdev_initialize_offset[TXG_SIZE];
 263         uint64_t        vdev_initialize_last_offset;
 264         range_tree_t    *vdev_initialize_tree;  /* valid while initializing */
 265         uint64_t        vdev_initialize_bytes_est;
 266         uint64_t        vdev_initialize_bytes_done;
 267         time_t          vdev_initialize_action_time;    /* start and end time */
 268 
 269         /* for limiting outstanding I/Os */


 289 
 290         /*
 291          * In memory data structures used to manage the obsolete sm, for
 292          * indirect or removing vdevs.
 293          *
 294          * The vdev_obsolete_segments is the in-core record of the segments
 295          * that are no longer referenced anywhere in the pool (due to
 296          * being freed or remapped and not referenced by any snapshots).
 297          * During a sync, segments are added to vdev_obsolete_segments
 298          * via vdev_indirect_mark_obsolete(); at the end of each sync
 299          * pass, this is appended to vdev_obsolete_sm via
 300          * vdev_indirect_sync_obsolete().  The vdev_obsolete_lock
 301          * protects against concurrent modifications of vdev_obsolete_segments
 302          * from multiple zio threads.
 303          */
 304         kmutex_t        vdev_obsolete_lock;
 305         range_tree_t    *vdev_obsolete_segments;
 306         space_map_t     *vdev_obsolete_sm;
 307 
 308         /*
 309          * The queue depth parameters determine how many async writes are
 310          * still pending (i.e. allocated but not yet issued to disk) per
 311          * top-level (vdev_async_write_queue_depth) and the maximum allowed
 312          * (vdev_max_async_write_queue_depth). These values only apply to
 313          * top-level vdevs.
 314          */
 315         uint64_t        vdev_async_write_queue_depth;
 316         uint64_t        vdev_max_async_write_queue_depth;
 317 
 318         /*
 319          * Leaf vdev state.
 320          */
 321         range_tree_t    *vdev_dtl[DTL_TYPES]; /* dirty time logs        */
 322         space_map_t     *vdev_dtl_sm;   /* dirty time log space map     */
 323         txg_node_t      vdev_dtl_node;  /* per-txg dirty DTL linkage    */
 324         uint64_t        vdev_dtl_object; /* DTL object                  */
 325         uint64_t        vdev_psize;     /* physical device capacity     */
 326         uint64_t        vdev_wholedisk; /* true if this is a whole disk */
 327         uint64_t        vdev_offline;   /* persistent offline state     */
 328         uint64_t        vdev_faulted;   /* persistent faulted state     */
 329         uint64_t        vdev_degraded;  /* persistent degraded state    */
 330         uint64_t        vdev_removed;   /* persistent removed state     */
 331         uint64_t        vdev_resilver_txg; /* persistent resilvering state */
 332         uint64_t        vdev_nparity;   /* number of parity devices for raidz */
 333         char            *vdev_path;     /* vdev path (if any)           */
 334         char            *vdev_devid;    /* vdev devid (if any)          */
 335         char            *vdev_physpath; /* vdev device path (if any)    */
 336         char            *vdev_fru;      /* physical FRU location        */
 337         uint64_t        vdev_not_present; /* not present during import  */
 338         uint64_t        vdev_unspare;   /* unspare when resilvering done */




 229 
 230         /*
 231          * Top-level vdev state.
 232          */
 233         uint64_t        vdev_ms_array;  /* metaslab array object        */
 234         uint64_t        vdev_ms_shift;  /* metaslab size shift          */
 235         uint64_t        vdev_ms_count;  /* number of metaslabs          */
 236         metaslab_group_t *vdev_mg;      /* metaslab group               */
 237         metaslab_t      **vdev_ms;      /* metaslab array               */
 238         txg_list_t      vdev_ms_list;   /* per-txg dirty metaslab lists */
 239         txg_list_t      vdev_dtl_list;  /* per-txg dirty DTL lists      */
 240         txg_node_t      vdev_txg_node;  /* per-txg dirty vdev linkage   */
 241         boolean_t       vdev_remove_wanted; /* async remove wanted?     */
 242         boolean_t       vdev_probe_wanted; /* async probe wanted?       */
 243         list_node_t     vdev_config_dirty_node; /* config dirty list    */
 244         list_node_t     vdev_state_dirty_node; /* state dirty list      */
 245         uint64_t        vdev_deflate_ratio; /* deflation ratio (x512)   */
 246         uint64_t        vdev_islog;     /* is an intent log device      */
 247         uint64_t        vdev_removing;  /* device is being removed?     */
 248         boolean_t       vdev_ishole;    /* is a hole in the namespace   */

 249         uint64_t        vdev_top_zap;
 250         vdev_alloc_bias_t vdev_alloc_bias; /* metaslab allocation bias  */
 251 
 252         /* pool checkpoint related */
 253         space_map_t     *vdev_checkpoint_sm;    /* contains reserved blocks */
 254 
 255         boolean_t       vdev_initialize_exit_wanted;
 256         vdev_initializing_state_t       vdev_initialize_state;
 257         kthread_t       *vdev_initialize_thread;
 258         /* Protects vdev_initialize_thread and vdev_initialize_state. */
 259         kmutex_t        vdev_initialize_lock;
 260         kcondvar_t      vdev_initialize_cv;
 261         uint64_t        vdev_initialize_offset[TXG_SIZE];
 262         uint64_t        vdev_initialize_last_offset;
 263         range_tree_t    *vdev_initialize_tree;  /* valid while initializing */
 264         uint64_t        vdev_initialize_bytes_est;
 265         uint64_t        vdev_initialize_bytes_done;
 266         time_t          vdev_initialize_action_time;    /* start and end time */
 267 
 268         /* for limiting outstanding I/Os */


 288 
 289         /*
 290          * In memory data structures used to manage the obsolete sm, for
 291          * indirect or removing vdevs.
 292          *
 293          * The vdev_obsolete_segments is the in-core record of the segments
 294          * that are no longer referenced anywhere in the pool (due to
 295          * being freed or remapped and not referenced by any snapshots).
 296          * During a sync, segments are added to vdev_obsolete_segments
 297          * via vdev_indirect_mark_obsolete(); at the end of each sync
 298          * pass, this is appended to vdev_obsolete_sm via
 299          * vdev_indirect_sync_obsolete().  The vdev_obsolete_lock
 300          * protects against concurrent modifications of vdev_obsolete_segments
 301          * from multiple zio threads.
 302          */
 303         kmutex_t        vdev_obsolete_lock;
 304         range_tree_t    *vdev_obsolete_segments;
 305         space_map_t     *vdev_obsolete_sm;
 306 
 307         /*










 308          * Leaf vdev state.
 309          */
 310         range_tree_t    *vdev_dtl[DTL_TYPES]; /* dirty time logs        */
 311         space_map_t     *vdev_dtl_sm;   /* dirty time log space map     */
 312         txg_node_t      vdev_dtl_node;  /* per-txg dirty DTL linkage    */
 313         uint64_t        vdev_dtl_object; /* DTL object                  */
 314         uint64_t        vdev_psize;     /* physical device capacity     */
 315         uint64_t        vdev_wholedisk; /* true if this is a whole disk */
 316         uint64_t        vdev_offline;   /* persistent offline state     */
 317         uint64_t        vdev_faulted;   /* persistent faulted state     */
 318         uint64_t        vdev_degraded;  /* persistent degraded state    */
 319         uint64_t        vdev_removed;   /* persistent removed state     */
 320         uint64_t        vdev_resilver_txg; /* persistent resilvering state */
 321         uint64_t        vdev_nparity;   /* number of parity devices for raidz */
 322         char            *vdev_path;     /* vdev path (if any)           */
 323         char            *vdev_devid;    /* vdev devid (if any)          */
 324         char            *vdev_physpath; /* vdev device path (if any)    */
 325         char            *vdev_fru;      /* physical FRU location        */
 326         uint64_t        vdev_not_present; /* not present during import  */
 327         uint64_t        vdev_unspare;   /* unspare when resilvering done */