Print this page
5056 ZFS deadlock on db_mtx and dn_holds
Reviewed by: Will Andrews <willa@spectralogic.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Approved by: Dan McDonald <danmcd@omniti.com>


   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
  24  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.

  25  */
  26 
  27 #include <sys/zfs_context.h>
  28 #include <sys/spa_impl.h>
  29 #include <sys/spa_boot.h>
  30 #include <sys/zio.h>
  31 #include <sys/zio_checksum.h>
  32 #include <sys/zio_compress.h>
  33 #include <sys/dmu.h>
  34 #include <sys/dmu_tx.h>
  35 #include <sys/zap.h>
  36 #include <sys/zil.h>
  37 #include <sys/vdev_impl.h>
  38 #include <sys/metaslab.h>
  39 #include <sys/uberblock_impl.h>
  40 #include <sys/txg.h>
  41 #include <sys/avl.h>
  42 #include <sys/unique.h>
  43 #include <sys/dsl_pool.h>
  44 #include <sys/dsl_dir.h>


 534 /*
 535  * Create an uninitialized spa_t with the given name.  Requires
 536  * spa_namespace_lock.  The caller must ensure that the spa_t doesn't already
 537  * exist by calling spa_lookup() first.
 538  */
 539 spa_t *
 540 spa_add(const char *name, nvlist_t *config, const char *altroot)
 541 {
 542         spa_t *spa;
 543         spa_config_dirent_t *dp;
 544         cyc_handler_t hdlr;
 545         cyc_time_t when;
 546 
 547         ASSERT(MUTEX_HELD(&spa_namespace_lock));
 548 
 549         spa = kmem_zalloc(sizeof (spa_t), KM_SLEEP);
 550 
 551         mutex_init(&spa->spa_async_lock, NULL, MUTEX_DEFAULT, NULL);
 552         mutex_init(&spa->spa_errlist_lock, NULL, MUTEX_DEFAULT, NULL);
 553         mutex_init(&spa->spa_errlog_lock, NULL, MUTEX_DEFAULT, NULL);

 554         mutex_init(&spa->spa_history_lock, NULL, MUTEX_DEFAULT, NULL);
 555         mutex_init(&spa->spa_proc_lock, NULL, MUTEX_DEFAULT, NULL);
 556         mutex_init(&spa->spa_props_lock, NULL, MUTEX_DEFAULT, NULL);
 557         mutex_init(&spa->spa_scrub_lock, NULL, MUTEX_DEFAULT, NULL);
 558         mutex_init(&spa->spa_suspend_lock, NULL, MUTEX_DEFAULT, NULL);
 559         mutex_init(&spa->spa_vdev_top_lock, NULL, MUTEX_DEFAULT, NULL);
 560         mutex_init(&spa->spa_iokstat_lock, NULL, MUTEX_DEFAULT, NULL);
 561 
 562         cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL);

 563         cv_init(&spa->spa_proc_cv, NULL, CV_DEFAULT, NULL);
 564         cv_init(&spa->spa_scrub_io_cv, NULL, CV_DEFAULT, NULL);
 565         cv_init(&spa->spa_suspend_cv, NULL, CV_DEFAULT, NULL);
 566 
 567         for (int t = 0; t < TXG_SIZE; t++)
 568                 bplist_create(&spa->spa_free_bplist[t]);
 569 
 570         (void) strlcpy(spa->spa_name, name, sizeof (spa->spa_name));
 571         spa->spa_state = POOL_STATE_UNINITIALIZED;
 572         spa->spa_freeze_txg = UINT64_MAX;
 573         spa->spa_final_txg = UINT64_MAX;
 574         spa->spa_load_max_txg = UINT64_MAX;
 575         spa->spa_proc = &p0;
 576         spa->spa_proc_state = SPA_PROC_NONE;
 577 
 578         hdlr.cyh_func = spa_deadman;
 579         hdlr.cyh_arg = spa;
 580         hdlr.cyh_level = CY_LOW_LEVEL;
 581 
 582         spa->spa_deadman_synctime = MSEC2NSEC(zfs_deadman_synctime_ms);


 652          */
 653         for (int i = 0; i < SPA_FEATURES; i++) {
 654                 spa->spa_feat_refcount_cache[i] = SPA_FEATURE_DISABLED;
 655         }
 656 
 657         return (spa);
 658 }
 659 
 660 /*
 661  * Removes a spa_t from the namespace, freeing up any memory used.  Requires
 662  * spa_namespace_lock.  This is called only after the spa_t has been closed and
 663  * deactivated.
 664  */
 665 void
 666 spa_remove(spa_t *spa)
 667 {
 668         spa_config_dirent_t *dp;
 669 
 670         ASSERT(MUTEX_HELD(&spa_namespace_lock));
 671         ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED);

 672 
 673         nvlist_free(spa->spa_config_splitting);
 674 
 675         avl_remove(&spa_namespace_avl, spa);
 676         cv_broadcast(&spa_namespace_cv);
 677 
 678         if (spa->spa_root) {
 679                 spa_strfree(spa->spa_root);
 680                 spa_active_count--;
 681         }
 682 
 683         while ((dp = list_head(&spa->spa_config_list)) != NULL) {
 684                 list_remove(&spa->spa_config_list, dp);
 685                 if (dp->scd_path != NULL)
 686                         spa_strfree(dp->scd_path);
 687                 kmem_free(dp, sizeof (spa_config_dirent_t));
 688         }
 689 
 690         list_destroy(&spa->spa_config_list);
 691 


 693         nvlist_free(spa->spa_load_info);
 694         spa_config_set(spa, NULL);
 695 
 696         mutex_enter(&cpu_lock);
 697         if (spa->spa_deadman_cycid != CYCLIC_NONE)
 698                 cyclic_remove(spa->spa_deadman_cycid);
 699         mutex_exit(&cpu_lock);
 700         spa->spa_deadman_cycid = CYCLIC_NONE;
 701 
 702         refcount_destroy(&spa->spa_refcount);
 703 
 704         spa_config_lock_destroy(spa);
 705 
 706         kstat_delete(spa->spa_iokstat);
 707         spa->spa_iokstat = NULL;
 708 
 709         for (int t = 0; t < TXG_SIZE; t++)
 710                 bplist_destroy(&spa->spa_free_bplist[t]);
 711 
 712         cv_destroy(&spa->spa_async_cv);

 713         cv_destroy(&spa->spa_proc_cv);
 714         cv_destroy(&spa->spa_scrub_io_cv);
 715         cv_destroy(&spa->spa_suspend_cv);
 716 
 717         mutex_destroy(&spa->spa_async_lock);
 718         mutex_destroy(&spa->spa_errlist_lock);
 719         mutex_destroy(&spa->spa_errlog_lock);

 720         mutex_destroy(&spa->spa_history_lock);
 721         mutex_destroy(&spa->spa_proc_lock);
 722         mutex_destroy(&spa->spa_props_lock);
 723         mutex_destroy(&spa->spa_scrub_lock);
 724         mutex_destroy(&spa->spa_suspend_lock);
 725         mutex_destroy(&spa->spa_vdev_top_lock);
 726         mutex_destroy(&spa->spa_iokstat_lock);
 727 
 728         kmem_free(spa, sizeof (spa_t));
 729 }
 730 
 731 /*
 732  * Given a pool, return the next pool in the namespace, or NULL if there is
 733  * none.  If 'prev' is NULL, return the first pool.
 734  */
 735 spa_t *
 736 spa_next(spa_t *prev)
 737 {
 738         ASSERT(MUTEX_HELD(&spa_namespace_lock));
 739 


 757 spa_open_ref(spa_t *spa, void *tag)
 758 {
 759         ASSERT(refcount_count(&spa->spa_refcount) >= spa->spa_minref ||
 760             MUTEX_HELD(&spa_namespace_lock));
 761         (void) refcount_add(&spa->spa_refcount, tag);
 762 }
 763 
 764 /*
 765  * Remove a reference to the given spa_t.  Must have at least one reference, or
 766  * have the namespace lock held.
 767  */
 768 void
 769 spa_close(spa_t *spa, void *tag)
 770 {
 771         ASSERT(refcount_count(&spa->spa_refcount) > spa->spa_minref ||
 772             MUTEX_HELD(&spa_namespace_lock));
 773         (void) refcount_remove(&spa->spa_refcount, tag);
 774 }
 775 
 776 /*














 777  * Check to see if the spa refcount is zero.  Must be called with
 778  * spa_namespace_lock held.  We really compare against spa_minref, which is the
 779  * number of references acquired when opening a pool
 780  */
 781 boolean_t
 782 spa_refcount_zero(spa_t *spa)
 783 {
 784         ASSERT(MUTEX_HELD(&spa_namespace_lock));
 785 
 786         return (refcount_count(&spa->spa_refcount) == spa->spa_minref);
 787 }
 788 
 789 /*
 790  * ==========================================================================
 791  * SPA spare and l2cache tracking
 792  * ==========================================================================
 793  */
 794 
 795 /*
 796  * Hot spares and cache devices are tracked using the same code below,


1652 }
1653 
1654 boolean_t
1655 spa_deflate(spa_t *spa)
1656 {
1657         return (spa->spa_deflate);
1658 }
1659 
1660 metaslab_class_t *
1661 spa_normal_class(spa_t *spa)
1662 {
1663         return (spa->spa_normal_class);
1664 }
1665 
1666 metaslab_class_t *
1667 spa_log_class(spa_t *spa)
1668 {
1669         return (spa->spa_log_class);
1670 }
1671 




























1672 int
1673 spa_max_replication(spa_t *spa)
1674 {
1675         /*
1676          * As of SPA_VERSION == SPA_VERSION_DITTO_BLOCKS, we are able to
1677          * handle BPs with more than one DVA allocated.  Set our max
1678          * replication level accordingly.
1679          */
1680         if (spa_version(spa) < SPA_VERSION_DITTO_BLOCKS)
1681                 return (1);
1682         return (MIN(SPA_DVAS_PER_BP, spa_max_replication_override));
1683 }
1684 
1685 int
1686 spa_prev_software_version(spa_t *spa)
1687 {
1688         return (spa->spa_prev_software_version);
1689 }
1690 
1691 uint64_t




   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
  24  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  25  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  26  */
  27 
  28 #include <sys/zfs_context.h>
  29 #include <sys/spa_impl.h>
  30 #include <sys/spa_boot.h>
  31 #include <sys/zio.h>
  32 #include <sys/zio_checksum.h>
  33 #include <sys/zio_compress.h>
  34 #include <sys/dmu.h>
  35 #include <sys/dmu_tx.h>
  36 #include <sys/zap.h>
  37 #include <sys/zil.h>
  38 #include <sys/vdev_impl.h>
  39 #include <sys/metaslab.h>
  40 #include <sys/uberblock_impl.h>
  41 #include <sys/txg.h>
  42 #include <sys/avl.h>
  43 #include <sys/unique.h>
  44 #include <sys/dsl_pool.h>
  45 #include <sys/dsl_dir.h>


 535 /*
 536  * Create an uninitialized spa_t with the given name.  Requires
 537  * spa_namespace_lock.  The caller must ensure that the spa_t doesn't already
 538  * exist by calling spa_lookup() first.
 539  */
 540 spa_t *
 541 spa_add(const char *name, nvlist_t *config, const char *altroot)
 542 {
 543         spa_t *spa;
 544         spa_config_dirent_t *dp;
 545         cyc_handler_t hdlr;
 546         cyc_time_t when;
 547 
 548         ASSERT(MUTEX_HELD(&spa_namespace_lock));
 549 
 550         spa = kmem_zalloc(sizeof (spa_t), KM_SLEEP);
 551 
 552         mutex_init(&spa->spa_async_lock, NULL, MUTEX_DEFAULT, NULL);
 553         mutex_init(&spa->spa_errlist_lock, NULL, MUTEX_DEFAULT, NULL);
 554         mutex_init(&spa->spa_errlog_lock, NULL, MUTEX_DEFAULT, NULL);
 555         mutex_init(&spa->spa_evicting_os_lock, NULL, MUTEX_DEFAULT, NULL);
 556         mutex_init(&spa->spa_history_lock, NULL, MUTEX_DEFAULT, NULL);
 557         mutex_init(&spa->spa_proc_lock, NULL, MUTEX_DEFAULT, NULL);
 558         mutex_init(&spa->spa_props_lock, NULL, MUTEX_DEFAULT, NULL);
 559         mutex_init(&spa->spa_scrub_lock, NULL, MUTEX_DEFAULT, NULL);
 560         mutex_init(&spa->spa_suspend_lock, NULL, MUTEX_DEFAULT, NULL);
 561         mutex_init(&spa->spa_vdev_top_lock, NULL, MUTEX_DEFAULT, NULL);
 562         mutex_init(&spa->spa_iokstat_lock, NULL, MUTEX_DEFAULT, NULL);
 563 
 564         cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL);
 565         cv_init(&spa->spa_evicting_os_cv, NULL, CV_DEFAULT, NULL);
 566         cv_init(&spa->spa_proc_cv, NULL, CV_DEFAULT, NULL);
 567         cv_init(&spa->spa_scrub_io_cv, NULL, CV_DEFAULT, NULL);
 568         cv_init(&spa->spa_suspend_cv, NULL, CV_DEFAULT, NULL);
 569 
 570         for (int t = 0; t < TXG_SIZE; t++)
 571                 bplist_create(&spa->spa_free_bplist[t]);
 572 
 573         (void) strlcpy(spa->spa_name, name, sizeof (spa->spa_name));
 574         spa->spa_state = POOL_STATE_UNINITIALIZED;
 575         spa->spa_freeze_txg = UINT64_MAX;
 576         spa->spa_final_txg = UINT64_MAX;
 577         spa->spa_load_max_txg = UINT64_MAX;
 578         spa->spa_proc = &p0;
 579         spa->spa_proc_state = SPA_PROC_NONE;
 580 
 581         hdlr.cyh_func = spa_deadman;
 582         hdlr.cyh_arg = spa;
 583         hdlr.cyh_level = CY_LOW_LEVEL;
 584 
 585         spa->spa_deadman_synctime = MSEC2NSEC(zfs_deadman_synctime_ms);


 655          */
 656         for (int i = 0; i < SPA_FEATURES; i++) {
 657                 spa->spa_feat_refcount_cache[i] = SPA_FEATURE_DISABLED;
 658         }
 659 
 660         return (spa);
 661 }
 662 
 663 /*
 664  * Removes a spa_t from the namespace, freeing up any memory used.  Requires
 665  * spa_namespace_lock.  This is called only after the spa_t has been closed and
 666  * deactivated.
 667  */
 668 void
 669 spa_remove(spa_t *spa)
 670 {
 671         spa_config_dirent_t *dp;
 672 
 673         ASSERT(MUTEX_HELD(&spa_namespace_lock));
 674         ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED);
 675         ASSERT3U(refcount_count(&spa->spa_refcount), ==, 0);
 676 
 677         nvlist_free(spa->spa_config_splitting);
 678 
 679         avl_remove(&spa_namespace_avl, spa);
 680         cv_broadcast(&spa_namespace_cv);
 681 
 682         if (spa->spa_root) {
 683                 spa_strfree(spa->spa_root);
 684                 spa_active_count--;
 685         }
 686 
 687         while ((dp = list_head(&spa->spa_config_list)) != NULL) {
 688                 list_remove(&spa->spa_config_list, dp);
 689                 if (dp->scd_path != NULL)
 690                         spa_strfree(dp->scd_path);
 691                 kmem_free(dp, sizeof (spa_config_dirent_t));
 692         }
 693 
 694         list_destroy(&spa->spa_config_list);
 695 


 697         nvlist_free(spa->spa_load_info);
 698         spa_config_set(spa, NULL);
 699 
 700         mutex_enter(&cpu_lock);
 701         if (spa->spa_deadman_cycid != CYCLIC_NONE)
 702                 cyclic_remove(spa->spa_deadman_cycid);
 703         mutex_exit(&cpu_lock);
 704         spa->spa_deadman_cycid = CYCLIC_NONE;
 705 
 706         refcount_destroy(&spa->spa_refcount);
 707 
 708         spa_config_lock_destroy(spa);
 709 
 710         kstat_delete(spa->spa_iokstat);
 711         spa->spa_iokstat = NULL;
 712 
 713         for (int t = 0; t < TXG_SIZE; t++)
 714                 bplist_destroy(&spa->spa_free_bplist[t]);
 715 
 716         cv_destroy(&spa->spa_async_cv);
 717         cv_destroy(&spa->spa_evicting_os_cv);
 718         cv_destroy(&spa->spa_proc_cv);
 719         cv_destroy(&spa->spa_scrub_io_cv);
 720         cv_destroy(&spa->spa_suspend_cv);
 721 
 722         mutex_destroy(&spa->spa_async_lock);
 723         mutex_destroy(&spa->spa_errlist_lock);
 724         mutex_destroy(&spa->spa_errlog_lock);
 725         mutex_destroy(&spa->spa_evicting_os_lock);
 726         mutex_destroy(&spa->spa_history_lock);
 727         mutex_destroy(&spa->spa_proc_lock);
 728         mutex_destroy(&spa->spa_props_lock);
 729         mutex_destroy(&spa->spa_scrub_lock);
 730         mutex_destroy(&spa->spa_suspend_lock);
 731         mutex_destroy(&spa->spa_vdev_top_lock);
 732         mutex_destroy(&spa->spa_iokstat_lock);
 733 
 734         kmem_free(spa, sizeof (spa_t));
 735 }
 736 
 737 /*
 738  * Given a pool, return the next pool in the namespace, or NULL if there is
 739  * none.  If 'prev' is NULL, return the first pool.
 740  */
 741 spa_t *
 742 spa_next(spa_t *prev)
 743 {
 744         ASSERT(MUTEX_HELD(&spa_namespace_lock));
 745 


 763 spa_open_ref(spa_t *spa, void *tag)
 764 {
 765         ASSERT(refcount_count(&spa->spa_refcount) >= spa->spa_minref ||
 766             MUTEX_HELD(&spa_namespace_lock));
 767         (void) refcount_add(&spa->spa_refcount, tag);
 768 }
 769 
 770 /*
 771  * Remove a reference to the given spa_t.  Must have at least one reference, or
 772  * have the namespace lock held.
 773  */
 774 void
 775 spa_close(spa_t *spa, void *tag)
 776 {
 777         ASSERT(refcount_count(&spa->spa_refcount) > spa->spa_minref ||
 778             MUTEX_HELD(&spa_namespace_lock));
 779         (void) refcount_remove(&spa->spa_refcount, tag);
 780 }
 781 
 782 /*
 783  * Remove a reference to the given spa_t held by a dsl dir that is
 784  * being asynchronously released.  Async releases occur from a taskq
 785  * performing eviction of dsl datasets and dirs.  The namespace lock
 786  * isn't held and the hold by the object being evicted may contribute to
 787  * spa_minref (e.g. dataset or directory released during pool export),
 788  * so the asserts in spa_close() do not apply.
 789  */
 790 void
 791 spa_async_close(spa_t *spa, void *tag)
 792 {
 793         (void) refcount_remove(&spa->spa_refcount, tag);
 794 }
 795 
 796 /*
 797  * Check to see if the spa refcount is zero.  Must be called with
 798  * spa_namespace_lock held.  We really compare against spa_minref, which is the
 799  * number of references acquired when opening a pool
 800  */
 801 boolean_t
 802 spa_refcount_zero(spa_t *spa)
 803 {
 804         ASSERT(MUTEX_HELD(&spa_namespace_lock));
 805 
 806         return (refcount_count(&spa->spa_refcount) == spa->spa_minref);
 807 }
 808 
 809 /*
 810  * ==========================================================================
 811  * SPA spare and l2cache tracking
 812  * ==========================================================================
 813  */
 814 
 815 /*
 816  * Hot spares and cache devices are tracked using the same code below,


1672 }
1673 
1674 boolean_t
1675 spa_deflate(spa_t *spa)
1676 {
1677         return (spa->spa_deflate);
1678 }
1679 
1680 metaslab_class_t *
1681 spa_normal_class(spa_t *spa)
1682 {
1683         return (spa->spa_normal_class);
1684 }
1685 
1686 metaslab_class_t *
1687 spa_log_class(spa_t *spa)
1688 {
1689         return (spa->spa_log_class);
1690 }
1691 
1692 void
1693 spa_evicting_os_register(spa_t *spa, objset_t *os)
1694 {
1695         mutex_enter(&spa->spa_evicting_os_lock);
1696         list_insert_head(&spa->spa_evicting_os_list, os);
1697         mutex_exit(&spa->spa_evicting_os_lock);
1698 }
1699 
1700 void
1701 spa_evicting_os_deregister(spa_t *spa, objset_t *os)
1702 {
1703         mutex_enter(&spa->spa_evicting_os_lock);
1704         list_remove(&spa->spa_evicting_os_list, os);
1705         cv_broadcast(&spa->spa_evicting_os_cv);
1706         mutex_exit(&spa->spa_evicting_os_lock);
1707 }
1708 
1709 void
1710 spa_evicting_os_wait(spa_t *spa)
1711 {
1712         mutex_enter(&spa->spa_evicting_os_lock);
1713         while (!list_is_empty(&spa->spa_evicting_os_list))
1714                 cv_wait(&spa->spa_evicting_os_cv, &spa->spa_evicting_os_lock);
1715         mutex_exit(&spa->spa_evicting_os_lock);
1716 
1717         dmu_buf_user_evict_wait();
1718 }
1719 
1720 int
1721 spa_max_replication(spa_t *spa)
1722 {
1723         /*
1724          * As of SPA_VERSION == SPA_VERSION_DITTO_BLOCKS, we are able to
1725          * handle BPs with more than one DVA allocated.  Set our max
1726          * replication level accordingly.
1727          */
1728         if (spa_version(spa) < SPA_VERSION_DITTO_BLOCKS)
1729                 return (1);
1730         return (MIN(SPA_DVAS_PER_BP, spa_max_replication_override));
1731 }
1732 
1733 int
1734 spa_prev_software_version(spa_t *spa)
1735 {
1736         return (spa->spa_prev_software_version);
1737 }
1738 
1739 uint64_t