Print this page
5056 ZFS deadlock on db_mtx and dn_holds
Reviewed by: Will Andrews <willa@spectralogic.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Approved by: Dan McDonald <danmcd@omniti.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/spa_misc.c
          +++ new/usr/src/uts/common/fs/zfs/spa_misc.c
↓ open down ↓ 14 lines elided ↑ open up ↑
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
  24   24   * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
       25 + * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  25   26   */
  26   27  
  27   28  #include <sys/zfs_context.h>
  28   29  #include <sys/spa_impl.h>
  29   30  #include <sys/spa_boot.h>
  30   31  #include <sys/zio.h>
  31   32  #include <sys/zio_checksum.h>
  32   33  #include <sys/zio_compress.h>
  33   34  #include <sys/dmu.h>
  34   35  #include <sys/dmu_tx.h>
↓ open down ↓ 509 lines elided ↑ open up ↑
 544  545          cyc_handler_t hdlr;
 545  546          cyc_time_t when;
 546  547  
 547  548          ASSERT(MUTEX_HELD(&spa_namespace_lock));
 548  549  
 549  550          spa = kmem_zalloc(sizeof (spa_t), KM_SLEEP);
 550  551  
 551  552          mutex_init(&spa->spa_async_lock, NULL, MUTEX_DEFAULT, NULL);
 552  553          mutex_init(&spa->spa_errlist_lock, NULL, MUTEX_DEFAULT, NULL);
 553  554          mutex_init(&spa->spa_errlog_lock, NULL, MUTEX_DEFAULT, NULL);
      555 +        mutex_init(&spa->spa_evicting_os_lock, NULL, MUTEX_DEFAULT, NULL);
 554  556          mutex_init(&spa->spa_history_lock, NULL, MUTEX_DEFAULT, NULL);
 555  557          mutex_init(&spa->spa_proc_lock, NULL, MUTEX_DEFAULT, NULL);
 556  558          mutex_init(&spa->spa_props_lock, NULL, MUTEX_DEFAULT, NULL);
 557  559          mutex_init(&spa->spa_scrub_lock, NULL, MUTEX_DEFAULT, NULL);
 558  560          mutex_init(&spa->spa_suspend_lock, NULL, MUTEX_DEFAULT, NULL);
 559  561          mutex_init(&spa->spa_vdev_top_lock, NULL, MUTEX_DEFAULT, NULL);
 560  562          mutex_init(&spa->spa_iokstat_lock, NULL, MUTEX_DEFAULT, NULL);
 561  563  
 562  564          cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL);
      565 +        cv_init(&spa->spa_evicting_os_cv, NULL, CV_DEFAULT, NULL);
 563  566          cv_init(&spa->spa_proc_cv, NULL, CV_DEFAULT, NULL);
 564  567          cv_init(&spa->spa_scrub_io_cv, NULL, CV_DEFAULT, NULL);
 565  568          cv_init(&spa->spa_suspend_cv, NULL, CV_DEFAULT, NULL);
 566  569  
 567  570          for (int t = 0; t < TXG_SIZE; t++)
 568  571                  bplist_create(&spa->spa_free_bplist[t]);
 569  572  
 570  573          (void) strlcpy(spa->spa_name, name, sizeof (spa->spa_name));
 571  574          spa->spa_state = POOL_STATE_UNINITIALIZED;
 572  575          spa->spa_freeze_txg = UINT64_MAX;
↓ open down ↓ 89 lines elided ↑ open up ↑
 662  665   * spa_namespace_lock.  This is called only after the spa_t has been closed and
 663  666   * deactivated.
 664  667   */
 665  668  void
 666  669  spa_remove(spa_t *spa)
 667  670  {
 668  671          spa_config_dirent_t *dp;
 669  672  
 670  673          ASSERT(MUTEX_HELD(&spa_namespace_lock));
 671  674          ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED);
      675 +        ASSERT3U(refcount_count(&spa->spa_refcount), ==, 0);
 672  676  
 673  677          nvlist_free(spa->spa_config_splitting);
 674  678  
 675  679          avl_remove(&spa_namespace_avl, spa);
 676  680          cv_broadcast(&spa_namespace_cv);
 677  681  
 678  682          if (spa->spa_root) {
 679  683                  spa_strfree(spa->spa_root);
 680  684                  spa_active_count--;
 681  685          }
↓ open down ↓ 21 lines elided ↑ open up ↑
 703  707  
 704  708          spa_config_lock_destroy(spa);
 705  709  
 706  710          kstat_delete(spa->spa_iokstat);
 707  711          spa->spa_iokstat = NULL;
 708  712  
 709  713          for (int t = 0; t < TXG_SIZE; t++)
 710  714                  bplist_destroy(&spa->spa_free_bplist[t]);
 711  715  
 712  716          cv_destroy(&spa->spa_async_cv);
      717 +        cv_destroy(&spa->spa_evicting_os_cv);
 713  718          cv_destroy(&spa->spa_proc_cv);
 714  719          cv_destroy(&spa->spa_scrub_io_cv);
 715  720          cv_destroy(&spa->spa_suspend_cv);
 716  721  
 717  722          mutex_destroy(&spa->spa_async_lock);
 718  723          mutex_destroy(&spa->spa_errlist_lock);
 719  724          mutex_destroy(&spa->spa_errlog_lock);
      725 +        mutex_destroy(&spa->spa_evicting_os_lock);
 720  726          mutex_destroy(&spa->spa_history_lock);
 721  727          mutex_destroy(&spa->spa_proc_lock);
 722  728          mutex_destroy(&spa->spa_props_lock);
 723  729          mutex_destroy(&spa->spa_scrub_lock);
 724  730          mutex_destroy(&spa->spa_suspend_lock);
 725  731          mutex_destroy(&spa->spa_vdev_top_lock);
 726  732          mutex_destroy(&spa->spa_iokstat_lock);
 727  733  
 728  734          kmem_free(spa, sizeof (spa_t));
 729  735  }
↓ open down ↓ 37 lines elided ↑ open up ↑
 767  773   */
 768  774  void
 769  775  spa_close(spa_t *spa, void *tag)
 770  776  {
 771  777          ASSERT(refcount_count(&spa->spa_refcount) > spa->spa_minref ||
 772  778              MUTEX_HELD(&spa_namespace_lock));
 773  779          (void) refcount_remove(&spa->spa_refcount, tag);
 774  780  }
 775  781  
 776  782  /*
      783 + * Remove a reference to the given spa_t held by a dsl dir that is
      784 + * being asynchronously released.  Async releases occur from a taskq
      785 + * performing eviction of dsl datasets and dirs.  The namespace lock
      786 + * isn't held and the hold by the object being evicted may contribute to
      787 + * spa_minref (e.g. dataset or directory released during pool export),
      788 + * so the asserts in spa_close() do not apply.
      789 + */
      790 +void
      791 +spa_async_close(spa_t *spa, void *tag)
      792 +{
      793 +        (void) refcount_remove(&spa->spa_refcount, tag);
      794 +}
      795 +
      796 +/*
 777  797   * Check to see if the spa refcount is zero.  Must be called with
 778  798   * spa_namespace_lock held.  We really compare against spa_minref, which is the
 779  799   * number of references acquired when opening a pool
 780  800   */
 781  801  boolean_t
 782  802  spa_refcount_zero(spa_t *spa)
 783  803  {
 784  804          ASSERT(MUTEX_HELD(&spa_namespace_lock));
 785  805  
 786  806          return (refcount_count(&spa->spa_refcount) == spa->spa_minref);
↓ open down ↓ 875 lines elided ↑ open up ↑
1662 1682  {
1663 1683          return (spa->spa_normal_class);
1664 1684  }
1665 1685  
1666 1686  metaslab_class_t *
1667 1687  spa_log_class(spa_t *spa)
1668 1688  {
1669 1689          return (spa->spa_log_class);
1670 1690  }
1671 1691  
     1692 +void
     1693 +spa_evicting_os_register(spa_t *spa, objset_t *os)
     1694 +{
     1695 +        mutex_enter(&spa->spa_evicting_os_lock);
     1696 +        list_insert_head(&spa->spa_evicting_os_list, os);
     1697 +        mutex_exit(&spa->spa_evicting_os_lock);
     1698 +}
     1699 +
     1700 +void
     1701 +spa_evicting_os_deregister(spa_t *spa, objset_t *os)
     1702 +{
     1703 +        mutex_enter(&spa->spa_evicting_os_lock);
     1704 +        list_remove(&spa->spa_evicting_os_list, os);
     1705 +        cv_broadcast(&spa->spa_evicting_os_cv);
     1706 +        mutex_exit(&spa->spa_evicting_os_lock);
     1707 +}
     1708 +
     1709 +void
     1710 +spa_evicting_os_wait(spa_t *spa)
     1711 +{
     1712 +        mutex_enter(&spa->spa_evicting_os_lock);
     1713 +        while (!list_is_empty(&spa->spa_evicting_os_list))
     1714 +                cv_wait(&spa->spa_evicting_os_cv, &spa->spa_evicting_os_lock);
     1715 +        mutex_exit(&spa->spa_evicting_os_lock);
     1716 +
     1717 +        dmu_buf_user_evict_wait();
     1718 +}
     1719 +
1672 1720  int
1673 1721  spa_max_replication(spa_t *spa)
1674 1722  {
1675 1723          /*
1676 1724           * As of SPA_VERSION == SPA_VERSION_DITTO_BLOCKS, we are able to
1677 1725           * handle BPs with more than one DVA allocated.  Set our max
1678 1726           * replication level accordingly.
1679 1727           */
1680 1728          if (spa_version(spa) < SPA_VERSION_DITTO_BLOCKS)
1681 1729                  return (1);
↓ open down ↓ 293 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX