Print this page
5056 ZFS deadlock on db_mtx and dn_holds
Reviewed by: Will Andrews <willa@spectralogic.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Approved by: Dan McDonald <danmcd@omniti.com>


 358     dmu_objset_type_t ostype,
 359     dnode_phys_t *dnp, uint64_t object, dmu_tx_t *tx);
 360 
 361 void
 362 dsl_free(dsl_pool_t *dp, uint64_t txg, const blkptr_t *bp)
 363 {
 364         zio_free(dp->dp_spa, txg, bp);
 365 }
 366 
 367 void
 368 dsl_free_sync(zio_t *pio, dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp)
 369 {
 370         ASSERT(dsl_pool_sync_context(dp));
 371         zio_nowait(zio_free_sync(pio, dp->dp_spa, txg, bpp, pio->io_flags));
 372 }
 373 
 374 static uint64_t
 375 dsl_scan_ds_maxtxg(dsl_dataset_t *ds)
 376 {
 377         uint64_t smt = ds->ds_dir->dd_pool->dp_scan->scn_phys.scn_max_txg;
 378         if (dsl_dataset_is_snapshot(ds))
 379                 return (MIN(smt, dsl_dataset_phys(ds)->ds_creation_txg));
 380         return (smt);
 381 }
 382 
 383 static void
 384 dsl_scan_sync_state(dsl_scan_t *scn, dmu_tx_t *tx)
 385 {
 386         VERIFY0(zap_update(scn->scn_dp->dp_meta_objset,
 387             DMU_POOL_DIRECTORY_OBJECT,
 388             DMU_POOL_SCAN, sizeof (uint64_t), SCAN_PHYS_NUMINTS,
 389             &scn->scn_phys, tx));
 390 }
 391 
 392 extern int zfs_vdev_async_write_active_min_dirty_percent;
 393 
 394 static boolean_t
 395 dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_phys_t *zb)
 396 {
 397         /* we never skip user/group accounting objects */
 398         if (zb && (int64_t)zb->zb_object < 0)


 794 
 795         SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
 796             ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
 797         dsl_scan_visitbp(bp, &zb, NULL,
 798             ds, scn, DMU_OST_NONE, tx);
 799 
 800         dprintf_ds(ds, "finished scan%s", "");
 801 }
 802 
 803 void
 804 dsl_scan_ds_destroyed(dsl_dataset_t *ds, dmu_tx_t *tx)
 805 {
 806         dsl_pool_t *dp = ds->ds_dir->dd_pool;
 807         dsl_scan_t *scn = dp->dp_scan;
 808         uint64_t mintxg;
 809 
 810         if (scn->scn_phys.scn_state != DSS_SCANNING)
 811                 return;
 812 
 813         if (scn->scn_phys.scn_bookmark.zb_objset == ds->ds_object) {
 814                 if (dsl_dataset_is_snapshot(ds)) {
 815                         /* Note, scn_cur_{min,max}_txg stays the same. */
 816                         scn->scn_phys.scn_bookmark.zb_objset =
 817                             dsl_dataset_phys(ds)->ds_next_snap_obj;
 818                         zfs_dbgmsg("destroying ds %llu; currently traversing; "
 819                             "reset zb_objset to %llu",
 820                             (u_longlong_t)ds->ds_object,
 821                             (u_longlong_t)dsl_dataset_phys(ds)->
 822                             ds_next_snap_obj);
 823                         scn->scn_phys.scn_flags |= DSF_VISIT_DS_AGAIN;
 824                 } else {
 825                         SET_BOOKMARK(&scn->scn_phys.scn_bookmark,
 826                             ZB_DESTROYED_OBJSET, 0, 0, 0);
 827                         zfs_dbgmsg("destroying ds %llu; currently traversing; "
 828                             "reset bookmark to -1,0,0,0",
 829                             (u_longlong_t)ds->ds_object);
 830                 }
 831         } else if (zap_lookup_int_key(dp->dp_meta_objset,
 832             scn->scn_phys.scn_queue_obj, ds->ds_object, &mintxg) == 0) {
 833                 ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1);
 834                 VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
 835                     scn->scn_phys.scn_queue_obj, ds->ds_object, tx));
 836                 if (dsl_dataset_is_snapshot(ds)) {
 837                         /*
 838                          * We keep the same mintxg; it could be >
 839                          * ds_creation_txg if the previous snapshot was
 840                          * deleted too.
 841                          */
 842                         VERIFY(zap_add_int_key(dp->dp_meta_objset,
 843                             scn->scn_phys.scn_queue_obj,
 844                             dsl_dataset_phys(ds)->ds_next_snap_obj,
 845                             mintxg, tx) == 0);
 846                         zfs_dbgmsg("destroying ds %llu; in queue; "
 847                             "replacing with %llu",
 848                             (u_longlong_t)ds->ds_object,
 849                             (u_longlong_t)dsl_dataset_phys(ds)->
 850                             ds_next_snap_obj);
 851                 } else {
 852                         zfs_dbgmsg("destroying ds %llu; in queue; removing",
 853                             (u_longlong_t)ds->ds_object);
 854                 }
 855         } else {
 856                 zfs_dbgmsg("destroying ds %llu; ignoring",


1000 
1001 static void
1002 dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx)
1003 {
1004         dsl_pool_t *dp = scn->scn_dp;
1005         dsl_dataset_t *ds;
1006         objset_t *os;
1007 
1008         VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
1009 
1010         if (dmu_objset_from_ds(ds, &os))
1011                 goto out;
1012 
1013         /*
1014          * Only the ZIL in the head (non-snapshot) is valid.  Even though
1015          * snapshots can have ZIL block pointers (which may be the same
1016          * BP as in the head), they must be ignored.  So we traverse the
1017          * ZIL here, rather than in scan_recurse(), because the regular
1018          * snapshot block-sharing rules don't apply to it.
1019          */
1020         if (DSL_SCAN_IS_SCRUB_RESILVER(scn) && !dsl_dataset_is_snapshot(ds))
1021                 dsl_scan_zil(dp, &os->os_zil_header);
1022 
1023         /*
1024          * Iterate over the bps in this ds.
1025          */
1026         dmu_buf_will_dirty(ds->ds_dbuf, tx);
1027         dsl_scan_visit_rootbp(scn, ds, &dsl_dataset_phys(ds)->ds_bp, tx);
1028 
1029         char *dsname = kmem_alloc(ZFS_MAXNAMELEN, KM_SLEEP);
1030         dsl_dataset_name(ds, dsname);
1031         zfs_dbgmsg("scanned dataset %llu (%s) with min=%llu max=%llu; "
1032             "pausing=%u",
1033             (longlong_t)dsobj, dsname,
1034             (longlong_t)scn->scn_phys.scn_cur_min_txg,
1035             (longlong_t)scn->scn_phys.scn_cur_max_txg,
1036             (int)scn->scn_pausing);
1037         kmem_free(dsname, ZFS_MAXNAMELEN);
1038 
1039         if (scn->scn_pausing)
1040                 goto out;




 358     dmu_objset_type_t ostype,
 359     dnode_phys_t *dnp, uint64_t object, dmu_tx_t *tx);
 360 
 361 void
 362 dsl_free(dsl_pool_t *dp, uint64_t txg, const blkptr_t *bp)
 363 {
 364         zio_free(dp->dp_spa, txg, bp);
 365 }
 366 
 367 void
 368 dsl_free_sync(zio_t *pio, dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp)
 369 {
 370         ASSERT(dsl_pool_sync_context(dp));
 371         zio_nowait(zio_free_sync(pio, dp->dp_spa, txg, bpp, pio->io_flags));
 372 }
 373 
 374 static uint64_t
 375 dsl_scan_ds_maxtxg(dsl_dataset_t *ds)
 376 {
 377         uint64_t smt = ds->ds_dir->dd_pool->dp_scan->scn_phys.scn_max_txg;
 378         if (ds->ds_is_snapshot)
 379                 return (MIN(smt, dsl_dataset_phys(ds)->ds_creation_txg));
 380         return (smt);
 381 }
 382 
 383 static void
 384 dsl_scan_sync_state(dsl_scan_t *scn, dmu_tx_t *tx)
 385 {
 386         VERIFY0(zap_update(scn->scn_dp->dp_meta_objset,
 387             DMU_POOL_DIRECTORY_OBJECT,
 388             DMU_POOL_SCAN, sizeof (uint64_t), SCAN_PHYS_NUMINTS,
 389             &scn->scn_phys, tx));
 390 }
 391 
 392 extern int zfs_vdev_async_write_active_min_dirty_percent;
 393 
 394 static boolean_t
 395 dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_phys_t *zb)
 396 {
 397         /* we never skip user/group accounting objects */
 398         if (zb && (int64_t)zb->zb_object < 0)


 794 
 795         SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
 796             ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
 797         dsl_scan_visitbp(bp, &zb, NULL,
 798             ds, scn, DMU_OST_NONE, tx);
 799 
 800         dprintf_ds(ds, "finished scan%s", "");
 801 }
 802 
 803 void
 804 dsl_scan_ds_destroyed(dsl_dataset_t *ds, dmu_tx_t *tx)
 805 {
 806         dsl_pool_t *dp = ds->ds_dir->dd_pool;
 807         dsl_scan_t *scn = dp->dp_scan;
 808         uint64_t mintxg;
 809 
 810         if (scn->scn_phys.scn_state != DSS_SCANNING)
 811                 return;
 812 
 813         if (scn->scn_phys.scn_bookmark.zb_objset == ds->ds_object) {
 814                 if (ds->ds_is_snapshot) {
 815                         /* Note, scn_cur_{min,max}_txg stays the same. */
 816                         scn->scn_phys.scn_bookmark.zb_objset =
 817                             dsl_dataset_phys(ds)->ds_next_snap_obj;
 818                         zfs_dbgmsg("destroying ds %llu; currently traversing; "
 819                             "reset zb_objset to %llu",
 820                             (u_longlong_t)ds->ds_object,
 821                             (u_longlong_t)dsl_dataset_phys(ds)->
 822                             ds_next_snap_obj);
 823                         scn->scn_phys.scn_flags |= DSF_VISIT_DS_AGAIN;
 824                 } else {
 825                         SET_BOOKMARK(&scn->scn_phys.scn_bookmark,
 826                             ZB_DESTROYED_OBJSET, 0, 0, 0);
 827                         zfs_dbgmsg("destroying ds %llu; currently traversing; "
 828                             "reset bookmark to -1,0,0,0",
 829                             (u_longlong_t)ds->ds_object);
 830                 }
 831         } else if (zap_lookup_int_key(dp->dp_meta_objset,
 832             scn->scn_phys.scn_queue_obj, ds->ds_object, &mintxg) == 0) {
 833                 ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1);
 834                 VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
 835                     scn->scn_phys.scn_queue_obj, ds->ds_object, tx));
 836                 if (ds->ds_is_snapshot) {
 837                         /*
 838                          * We keep the same mintxg; it could be >
 839                          * ds_creation_txg if the previous snapshot was
 840                          * deleted too.
 841                          */
 842                         VERIFY(zap_add_int_key(dp->dp_meta_objset,
 843                             scn->scn_phys.scn_queue_obj,
 844                             dsl_dataset_phys(ds)->ds_next_snap_obj,
 845                             mintxg, tx) == 0);
 846                         zfs_dbgmsg("destroying ds %llu; in queue; "
 847                             "replacing with %llu",
 848                             (u_longlong_t)ds->ds_object,
 849                             (u_longlong_t)dsl_dataset_phys(ds)->
 850                             ds_next_snap_obj);
 851                 } else {
 852                         zfs_dbgmsg("destroying ds %llu; in queue; removing",
 853                             (u_longlong_t)ds->ds_object);
 854                 }
 855         } else {
 856                 zfs_dbgmsg("destroying ds %llu; ignoring",


1000 
1001 static void
1002 dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx)
1003 {
1004         dsl_pool_t *dp = scn->scn_dp;
1005         dsl_dataset_t *ds;
1006         objset_t *os;
1007 
1008         VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
1009 
1010         if (dmu_objset_from_ds(ds, &os))
1011                 goto out;
1012 
1013         /*
1014          * Only the ZIL in the head (non-snapshot) is valid.  Even though
1015          * snapshots can have ZIL block pointers (which may be the same
1016          * BP as in the head), they must be ignored.  So we traverse the
1017          * ZIL here, rather than in scan_recurse(), because the regular
1018          * snapshot block-sharing rules don't apply to it.
1019          */
1020         if (DSL_SCAN_IS_SCRUB_RESILVER(scn) && !ds->ds_is_snapshot)
1021                 dsl_scan_zil(dp, &os->os_zil_header);
1022 
1023         /*
1024          * Iterate over the bps in this ds.
1025          */
1026         dmu_buf_will_dirty(ds->ds_dbuf, tx);
1027         dsl_scan_visit_rootbp(scn, ds, &dsl_dataset_phys(ds)->ds_bp, tx);
1028 
1029         char *dsname = kmem_alloc(ZFS_MAXNAMELEN, KM_SLEEP);
1030         dsl_dataset_name(ds, dsname);
1031         zfs_dbgmsg("scanned dataset %llu (%s) with min=%llu max=%llu; "
1032             "pausing=%u",
1033             (longlong_t)dsobj, dsname,
1034             (longlong_t)scn->scn_phys.scn_cur_min_txg,
1035             (longlong_t)scn->scn_phys.scn_cur_max_txg,
1036             (int)scn->scn_pausing);
1037         kmem_free(dsname, ZFS_MAXNAMELEN);
1038 
1039         if (scn->scn_pausing)
1040                 goto out;