Print this page
5056 ZFS deadlock on db_mtx and dn_holds
Reviewed by: Will Andrews <willa@spectralogic.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Approved by: Dan McDonald <danmcd@omniti.com>


   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
  24  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  25  * Copyright (c) 2013, Joyent, Inc. All rights reserved.

  26  */
  27 
  28 /* Portions Copyright 2010 Robert Milkowski */
  29 
  30 #include <sys/cred.h>
  31 #include <sys/zfs_context.h>
  32 #include <sys/dmu_objset.h>
  33 #include <sys/dsl_dir.h>
  34 #include <sys/dsl_dataset.h>
  35 #include <sys/dsl_prop.h>
  36 #include <sys/dsl_pool.h>
  37 #include <sys/dsl_synctask.h>
  38 #include <sys/dsl_deleg.h>
  39 #include <sys/dnode.h>
  40 #include <sys/dbuf.h>
  41 #include <sys/zvol.h>
  42 #include <sys/dmu_tx.h>
  43 #include <sys/zap.h>
  44 #include <sys/zil.h>
  45 #include <sys/dmu_impl.h>


 338                     &os->os_phys_buf, ARC_BUFC_METADATA);
 339                 os->os_phys = os->os_phys_buf->b_data;
 340                 bzero(os->os_phys, size);
 341         }
 342 
 343         /*
 344          * Note: the changed_cb will be called once before the register
 345          * func returns, thus changing the checksum/compression from the
 346          * default (fletcher2/off).  Snapshots don't need to know about
 347          * checksum/compression/copies.
 348          */
 349         if (ds != NULL) {
 350                 err = dsl_prop_register(ds,
 351                     zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
 352                     primary_cache_changed_cb, os);
 353                 if (err == 0) {
 354                         err = dsl_prop_register(ds,
 355                             zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE),
 356                             secondary_cache_changed_cb, os);
 357                 }
 358                 if (!dsl_dataset_is_snapshot(ds)) {
 359                         if (err == 0) {
 360                                 err = dsl_prop_register(ds,
 361                                     zfs_prop_to_name(ZFS_PROP_CHECKSUM),
 362                                     checksum_changed_cb, os);
 363                         }
 364                         if (err == 0) {
 365                                 err = dsl_prop_register(ds,
 366                                     zfs_prop_to_name(ZFS_PROP_COMPRESSION),
 367                                     compression_changed_cb, os);
 368                         }
 369                         if (err == 0) {
 370                                 err = dsl_prop_register(ds,
 371                                     zfs_prop_to_name(ZFS_PROP_COPIES),
 372                                     copies_changed_cb, os);
 373                         }
 374                         if (err == 0) {
 375                                 err = dsl_prop_register(ds,
 376                                     zfs_prop_to_name(ZFS_PROP_DEDUP),
 377                                     dedup_changed_cb, os);
 378                         }


 400                 }
 401                 if (err != 0) {
 402                         VERIFY(arc_buf_remove_ref(os->os_phys_buf,
 403                             &os->os_phys_buf));
 404                         kmem_free(os, sizeof (objset_t));
 405                         return (err);
 406                 }
 407         } else {
 408                 /* It's the meta-objset. */
 409                 os->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
 410                 os->os_compress = ZIO_COMPRESS_LZJB;
 411                 os->os_copies = spa_max_replication(spa);
 412                 os->os_dedup_checksum = ZIO_CHECKSUM_OFF;
 413                 os->os_dedup_verify = B_FALSE;
 414                 os->os_logbias = ZFS_LOGBIAS_LATENCY;
 415                 os->os_sync = ZFS_SYNC_STANDARD;
 416                 os->os_primary_cache = ZFS_CACHE_ALL;
 417                 os->os_secondary_cache = ZFS_CACHE_ALL;
 418         }
 419 
 420         if (ds == NULL || !dsl_dataset_is_snapshot(ds))
 421                 os->os_zil_header = os->os_phys->os_zil_header;
 422         os->os_zil = zil_alloc(os, &os->os_zil_header);
 423 
 424         for (i = 0; i < TXG_SIZE; i++) {
 425                 list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t),
 426                     offsetof(dnode_t, dn_dirty_link[i]));
 427                 list_create(&os->os_free_dnodes[i], sizeof (dnode_t),
 428                     offsetof(dnode_t, dn_dirty_link[i]));
 429         }
 430         list_create(&os->os_dnodes, sizeof (dnode_t),
 431             offsetof(dnode_t, dn_link));
 432         list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t),
 433             offsetof(dmu_buf_impl_t, db_link));
 434 
 435         mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL);
 436         mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);
 437         mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL);
 438 
 439         DMU_META_DNODE(os) = dnode_special_open(os,
 440             &os->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT,
 441             &os->os_meta_dnode);
 442         if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) {
 443                 DMU_USERUSED_DNODE(os) = dnode_special_open(os,
 444                     &os->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT,
 445                     &os->os_userused_dnode);
 446                 DMU_GROUPUSED_DNODE(os) = dnode_special_open(os,
 447                     &os->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT,
 448                     &os->os_groupused_dnode);
 449         }
 450 
 451         *osp = os;
 452         return (0);
 453 }
 454 
 455 int
 456 dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp)
 457 {
 458         int err = 0;
 459 
 460         mutex_enter(&ds->ds_opening_lock);
 461         if (ds->ds_objset == NULL) {
 462                 objset_t *os;
 463                 err = dmu_objset_open_impl(dsl_dataset_get_spa(ds),
 464                     ds, dsl_dataset_get_blkptr(ds), &os);
 465 
 466                 if (err == 0) {
 467                         mutex_enter(&ds->ds_lock);
 468                         ASSERT(ds->ds_objset == NULL);


 516         dsl_pool_t *dp;
 517         dsl_dataset_t *ds;
 518         int err;
 519 
 520         err = dsl_pool_hold(name, FTAG, &dp);
 521         if (err != 0)
 522                 return (err);
 523         err = dsl_dataset_own(dp, name, tag, &ds);
 524         if (err != 0) {
 525                 dsl_pool_rele(dp, FTAG);
 526                 return (err);
 527         }
 528 
 529         err = dmu_objset_from_ds(ds, osp);
 530         dsl_pool_rele(dp, FTAG);
 531         if (err != 0) {
 532                 dsl_dataset_disown(ds, tag);
 533         } else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) {
 534                 dsl_dataset_disown(ds, tag);
 535                 return (SET_ERROR(EINVAL));
 536         } else if (!readonly && dsl_dataset_is_snapshot(ds)) {
 537                 dsl_dataset_disown(ds, tag);
 538                 return (SET_ERROR(EROFS));
 539         }
 540         return (err);
 541 }
 542 
 543 void
 544 dmu_objset_rele(objset_t *os, void *tag)
 545 {
 546         dsl_pool_t *dp = dmu_objset_pool(os);
 547         dsl_dataset_rele(os->os_dsl_dataset, tag);
 548         dsl_pool_rele(dp, tag);
 549 }
 550 
 551 /*
 552  * When we are called, os MUST refer to an objset associated with a dataset
 553  * that is owned by 'tag'; that is, is held and long held by 'tag' and ds_owner
 554  * == tag.  We will then release and reacquire ownership of the dataset while
 555  * holding the pool config_rwlock to avoid intervening namespace or ownership
 556  * changes may occur.


 572         VERIFY(dsl_dataset_long_held(ds));
 573 
 574         dsl_dataset_name(ds, name);
 575         dp = dmu_objset_pool(os);
 576         dsl_pool_config_enter(dp, FTAG);
 577         dmu_objset_disown(os, tag);
 578         VERIFY0(dsl_dataset_own(dp, name, tag, &newds));
 579         VERIFY3P(newds, ==, os->os_dsl_dataset);
 580         dsl_pool_config_exit(dp, FTAG);
 581 }
 582 
 583 void
 584 dmu_objset_disown(objset_t *os, void *tag)
 585 {
 586         dsl_dataset_disown(os->os_dsl_dataset, tag);
 587 }
 588 
 589 void
 590 dmu_objset_evict_dbufs(objset_t *os)
 591 {

 592         dnode_t *dn;
 593 
 594         mutex_enter(&os->os_lock);
 595 
 596         /* process the mdn last, since the other dnodes have holds on it */
 597         list_remove(&os->os_dnodes, DMU_META_DNODE(os));
 598         list_insert_tail(&os->os_dnodes, DMU_META_DNODE(os));
 599 
 600         /*
 601          * Find the first dnode with holds.  We have to do this dance
 602          * because dnode_add_ref() only works if you already have a
 603          * hold.  If there are no holds then it has no dbufs so OK to
 604          * skip.
 605          */
 606         for (dn = list_head(&os->os_dnodes);
 607             dn && !dnode_add_ref(dn, FTAG);
 608             dn = list_next(&os->os_dnodes, dn))
 609                 continue;
 610 
 611         while (dn) {
 612                 dnode_t *next_dn = dn;
 613 
 614                 do {
 615                         next_dn = list_next(&os->os_dnodes, next_dn);
 616                 } while (next_dn && !dnode_add_ref(next_dn, FTAG));
 617 
 618                 mutex_exit(&os->os_lock);

 619                 dnode_evict_dbufs(dn);
 620                 dnode_rele(dn, FTAG);

 621                 mutex_enter(&os->os_lock);
 622                 dn = next_dn;



 623         }

 624         mutex_exit(&os->os_lock);






 625 }
 626 













 627 void
 628 dmu_objset_evict(objset_t *os)
 629 {
 630         dsl_dataset_t *ds = os->os_dsl_dataset;
 631 
 632         for (int t = 0; t < TXG_SIZE; t++)
 633                 ASSERT(!dmu_objset_is_dirty(os, t));
 634 
 635         if (ds) {
 636                 if (!dsl_dataset_is_snapshot(ds)) {
 637                         VERIFY0(dsl_prop_unregister(ds,
 638                             zfs_prop_to_name(ZFS_PROP_CHECKSUM),
 639                             checksum_changed_cb, os));
 640                         VERIFY0(dsl_prop_unregister(ds,
 641                             zfs_prop_to_name(ZFS_PROP_COMPRESSION),
 642                             compression_changed_cb, os));
 643                         VERIFY0(dsl_prop_unregister(ds,
 644                             zfs_prop_to_name(ZFS_PROP_COPIES),
 645                             copies_changed_cb, os));
 646                         VERIFY0(dsl_prop_unregister(ds,
 647                             zfs_prop_to_name(ZFS_PROP_DEDUP),
 648                             dedup_changed_cb, os));
 649                         VERIFY0(dsl_prop_unregister(ds,
 650                             zfs_prop_to_name(ZFS_PROP_LOGBIAS),
 651                             logbias_changed_cb, os));
 652                         VERIFY0(dsl_prop_unregister(ds,
 653                             zfs_prop_to_name(ZFS_PROP_SYNC),
 654                             sync_changed_cb, os));
 655                         VERIFY0(dsl_prop_unregister(ds,
 656                             zfs_prop_to_name(ZFS_PROP_REDUNDANT_METADATA),
 657                             redundant_metadata_changed_cb, os));
 658                         VERIFY0(dsl_prop_unregister(ds,
 659                             zfs_prop_to_name(ZFS_PROP_RECORDSIZE),
 660                             recordsize_changed_cb, os));
 661                 }
 662                 VERIFY0(dsl_prop_unregister(ds,
 663                     zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
 664                     primary_cache_changed_cb, os));
 665                 VERIFY0(dsl_prop_unregister(ds,
 666                     zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE),
 667                     secondary_cache_changed_cb, os));
 668         }
 669 
 670         if (os->os_sa)
 671                 sa_tear_down(os);
 672 

 673         dmu_objset_evict_dbufs(os);
 674 















 675         dnode_special_close(&os->os_meta_dnode);
 676         if (DMU_USERUSED_DNODE(os)) {
 677                 dnode_special_close(&os->os_userused_dnode);
 678                 dnode_special_close(&os->os_groupused_dnode);
 679         }
 680         zil_free(os->os_zil);
 681 
 682         ASSERT3P(list_head(&os->os_dnodes), ==, NULL);
 683 
 684         VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf));
 685 
 686         /*
 687          * This is a barrier to prevent the objset from going away in
 688          * dnode_move() until we can safely ensure that the objset is still in
 689          * use. We consider the objset valid before the barrier and invalid
 690          * after the barrier.
 691          */
 692         rw_enter(&os_lock, RW_READER);
 693         rw_exit(&os_lock);
 694 
 695         mutex_destroy(&os->os_lock);
 696         mutex_destroy(&os->os_obj_lock);
 697         mutex_destroy(&os->os_user_ptr_lock);

 698         kmem_free(os, sizeof (objset_t));
 699 }
 700 
 701 timestruc_t
 702 dmu_objset_snap_cmtime(objset_t *os)
 703 {
 704         return (dsl_dir_snap_cmtime(os->os_dsl_dataset->ds_dir));
 705 }
 706 
 707 /* called from dsl for meta-objset */
 708 objset_t *
 709 dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
 710     dmu_objset_type_t type, dmu_tx_t *tx)
 711 {
 712         objset_t *os;
 713         dnode_t *mdn;
 714 
 715         ASSERT(dmu_tx_is_syncing(tx));
 716 
 717         if (ds != NULL)


 886         }
 887         error = dsl_fs_ss_limit_check(pdd, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL,
 888             doca->doca_cred);
 889         if (error != 0) {
 890                 dsl_dir_rele(pdd, FTAG);
 891                 return (SET_ERROR(EDQUOT));
 892         }
 893         dsl_dir_rele(pdd, FTAG);
 894 
 895         error = dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin);
 896         if (error != 0)
 897                 return (error);
 898 
 899         /* You can't clone across pools. */
 900         if (origin->ds_dir->dd_pool != dp) {
 901                 dsl_dataset_rele(origin, FTAG);
 902                 return (SET_ERROR(EXDEV));
 903         }
 904 
 905         /* You can only clone snapshots, not the head datasets. */
 906         if (!dsl_dataset_is_snapshot(origin)) {
 907                 dsl_dataset_rele(origin, FTAG);
 908                 return (SET_ERROR(EINVAL));
 909         }
 910         dsl_dataset_rele(origin, FTAG);
 911 
 912         return (0);
 913 }
 914 
 915 static void
 916 dmu_objset_clone_sync(void *arg, dmu_tx_t *tx)
 917 {
 918         dmu_objset_clone_arg_t *doca = arg;
 919         dsl_pool_t *dp = dmu_tx_pool(tx);
 920         dsl_dir_t *pdd;
 921         const char *tail;
 922         dsl_dataset_t *origin, *ds;
 923         uint64_t obj;
 924         char namebuf[MAXNAMELEN];
 925 
 926         VERIFY0(dsl_dir_hold(dp, doca->doca_clone, FTAG, &pdd, &tail));


1450 
1451 void
1452 dmu_objset_stats(objset_t *os, nvlist_t *nv)
1453 {
1454         ASSERT(os->os_dsl_dataset ||
1455             os->os_phys->os_type == DMU_OST_META);
1456 
1457         if (os->os_dsl_dataset != NULL)
1458                 dsl_dataset_stats(os->os_dsl_dataset, nv);
1459 
1460         dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_TYPE,
1461             os->os_phys->os_type);
1462         dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERACCOUNTING,
1463             dmu_objset_userspace_present(os));
1464 }
1465 
1466 int
1467 dmu_objset_is_snapshot(objset_t *os)
1468 {
1469         if (os->os_dsl_dataset != NULL)
1470                 return (dsl_dataset_is_snapshot(os->os_dsl_dataset));
1471         else
1472                 return (B_FALSE);
1473 }
1474 
1475 int
1476 dmu_snapshot_realname(objset_t *os, char *name, char *real, int maxlen,
1477     boolean_t *conflict)
1478 {
1479         dsl_dataset_t *ds = os->os_dsl_dataset;
1480         uint64_t ignored;
1481 
1482         if (dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0)
1483                 return (SET_ERROR(ENOENT));
1484 
1485         return (zap_lookup_norm(ds->ds_dir->dd_pool->dp_meta_objset,
1486             dsl_dataset_phys(ds)->ds_snapnames_zapobj, name, 8, 1, &ignored,
1487             MT_FIRST, real, maxlen, conflict));
1488 }
1489 
1490 int




   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
  24  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  25  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  26  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  27  */
  28 
  29 /* Portions Copyright 2010 Robert Milkowski */
  30 
  31 #include <sys/cred.h>
  32 #include <sys/zfs_context.h>
  33 #include <sys/dmu_objset.h>
  34 #include <sys/dsl_dir.h>
  35 #include <sys/dsl_dataset.h>
  36 #include <sys/dsl_prop.h>
  37 #include <sys/dsl_pool.h>
  38 #include <sys/dsl_synctask.h>
  39 #include <sys/dsl_deleg.h>
  40 #include <sys/dnode.h>
  41 #include <sys/dbuf.h>
  42 #include <sys/zvol.h>
  43 #include <sys/dmu_tx.h>
  44 #include <sys/zap.h>
  45 #include <sys/zil.h>
  46 #include <sys/dmu_impl.h>


 339                     &os->os_phys_buf, ARC_BUFC_METADATA);
 340                 os->os_phys = os->os_phys_buf->b_data;
 341                 bzero(os->os_phys, size);
 342         }
 343 
 344         /*
 345          * Note: the changed_cb will be called once before the register
 346          * func returns, thus changing the checksum/compression from the
 347          * default (fletcher2/off).  Snapshots don't need to know about
 348          * checksum/compression/copies.
 349          */
 350         if (ds != NULL) {
 351                 err = dsl_prop_register(ds,
 352                     zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
 353                     primary_cache_changed_cb, os);
 354                 if (err == 0) {
 355                         err = dsl_prop_register(ds,
 356                             zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE),
 357                             secondary_cache_changed_cb, os);
 358                 }
 359                 if (!ds->ds_is_snapshot) {
 360                         if (err == 0) {
 361                                 err = dsl_prop_register(ds,
 362                                     zfs_prop_to_name(ZFS_PROP_CHECKSUM),
 363                                     checksum_changed_cb, os);
 364                         }
 365                         if (err == 0) {
 366                                 err = dsl_prop_register(ds,
 367                                     zfs_prop_to_name(ZFS_PROP_COMPRESSION),
 368                                     compression_changed_cb, os);
 369                         }
 370                         if (err == 0) {
 371                                 err = dsl_prop_register(ds,
 372                                     zfs_prop_to_name(ZFS_PROP_COPIES),
 373                                     copies_changed_cb, os);
 374                         }
 375                         if (err == 0) {
 376                                 err = dsl_prop_register(ds,
 377                                     zfs_prop_to_name(ZFS_PROP_DEDUP),
 378                                     dedup_changed_cb, os);
 379                         }


 401                 }
 402                 if (err != 0) {
 403                         VERIFY(arc_buf_remove_ref(os->os_phys_buf,
 404                             &os->os_phys_buf));
 405                         kmem_free(os, sizeof (objset_t));
 406                         return (err);
 407                 }
 408         } else {
 409                 /* It's the meta-objset. */
 410                 os->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
 411                 os->os_compress = ZIO_COMPRESS_LZJB;
 412                 os->os_copies = spa_max_replication(spa);
 413                 os->os_dedup_checksum = ZIO_CHECKSUM_OFF;
 414                 os->os_dedup_verify = B_FALSE;
 415                 os->os_logbias = ZFS_LOGBIAS_LATENCY;
 416                 os->os_sync = ZFS_SYNC_STANDARD;
 417                 os->os_primary_cache = ZFS_CACHE_ALL;
 418                 os->os_secondary_cache = ZFS_CACHE_ALL;
 419         }
 420 
 421         if (ds == NULL || !ds->ds_is_snapshot)
 422                 os->os_zil_header = os->os_phys->os_zil_header;
 423         os->os_zil = zil_alloc(os, &os->os_zil_header);
 424 
 425         for (i = 0; i < TXG_SIZE; i++) {
 426                 list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t),
 427                     offsetof(dnode_t, dn_dirty_link[i]));
 428                 list_create(&os->os_free_dnodes[i], sizeof (dnode_t),
 429                     offsetof(dnode_t, dn_dirty_link[i]));
 430         }
 431         list_create(&os->os_dnodes, sizeof (dnode_t),
 432             offsetof(dnode_t, dn_link));
 433         list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t),
 434             offsetof(dmu_buf_impl_t, db_link));
 435 
 436         mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL);
 437         mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);
 438         mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL);
 439 
 440         dnode_special_open(os, &os->os_phys->os_meta_dnode,
 441             DMU_META_DNODE_OBJECT, &os->os_meta_dnode);

 442         if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) {
 443                 dnode_special_open(os, &os->os_phys->os_userused_dnode,
 444                     DMU_USERUSED_OBJECT, &os->os_userused_dnode);
 445                 dnode_special_open(os, &os->os_phys->os_groupused_dnode,
 446                     DMU_GROUPUSED_OBJECT, &os->os_groupused_dnode);


 447         }
 448 
 449         *osp = os;
 450         return (0);
 451 }
 452 
 453 int
 454 dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp)
 455 {
 456         int err = 0;
 457 
 458         mutex_enter(&ds->ds_opening_lock);
 459         if (ds->ds_objset == NULL) {
 460                 objset_t *os;
 461                 err = dmu_objset_open_impl(dsl_dataset_get_spa(ds),
 462                     ds, dsl_dataset_get_blkptr(ds), &os);
 463 
 464                 if (err == 0) {
 465                         mutex_enter(&ds->ds_lock);
 466                         ASSERT(ds->ds_objset == NULL);


 514         dsl_pool_t *dp;
 515         dsl_dataset_t *ds;
 516         int err;
 517 
 518         err = dsl_pool_hold(name, FTAG, &dp);
 519         if (err != 0)
 520                 return (err);
 521         err = dsl_dataset_own(dp, name, tag, &ds);
 522         if (err != 0) {
 523                 dsl_pool_rele(dp, FTAG);
 524                 return (err);
 525         }
 526 
 527         err = dmu_objset_from_ds(ds, osp);
 528         dsl_pool_rele(dp, FTAG);
 529         if (err != 0) {
 530                 dsl_dataset_disown(ds, tag);
 531         } else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) {
 532                 dsl_dataset_disown(ds, tag);
 533                 return (SET_ERROR(EINVAL));
 534         } else if (!readonly && ds->ds_is_snapshot) {
 535                 dsl_dataset_disown(ds, tag);
 536                 return (SET_ERROR(EROFS));
 537         }
 538         return (err);
 539 }
 540 
 541 void
 542 dmu_objset_rele(objset_t *os, void *tag)
 543 {
 544         dsl_pool_t *dp = dmu_objset_pool(os);
 545         dsl_dataset_rele(os->os_dsl_dataset, tag);
 546         dsl_pool_rele(dp, tag);
 547 }
 548 
 549 /*
 550  * When we are called, os MUST refer to an objset associated with a dataset
 551  * that is owned by 'tag'; that is, is held and long held by 'tag' and ds_owner
 552  * == tag.  We will then release and reacquire ownership of the dataset while
 553  * holding the pool config_rwlock to avoid intervening namespace or ownership
 554  * changes may occur.


 570         VERIFY(dsl_dataset_long_held(ds));
 571 
 572         dsl_dataset_name(ds, name);
 573         dp = dmu_objset_pool(os);
 574         dsl_pool_config_enter(dp, FTAG);
 575         dmu_objset_disown(os, tag);
 576         VERIFY0(dsl_dataset_own(dp, name, tag, &newds));
 577         VERIFY3P(newds, ==, os->os_dsl_dataset);
 578         dsl_pool_config_exit(dp, FTAG);
 579 }
 580 
 581 void
 582 dmu_objset_disown(objset_t *os, void *tag)
 583 {
 584         dsl_dataset_disown(os->os_dsl_dataset, tag);
 585 }
 586 
 587 void
 588 dmu_objset_evict_dbufs(objset_t *os)
 589 {
 590         dnode_t dn_marker;
 591         dnode_t *dn;
 592 
 593         mutex_enter(&os->os_lock);
 594         dn = list_head(&os->os_dnodes);
 595         while (dn != NULL) {



 596                 /*
 597                  * Skip dnodes without holds.  We have to do this dance
 598                  * because dnode_add_ref() only works if there is already a
 599                  * hold.  If the dnode has no holds, then it has no dbufs.

 600                  */
 601                 if (dnode_add_ref(dn, FTAG)) {
 602                         list_insert_after(&os->os_dnodes, dn, &dn_marker);










 603                         mutex_exit(&os->os_lock);
 604 
 605                         dnode_evict_dbufs(dn);
 606                         dnode_rele(dn, FTAG);
 607 
 608                         mutex_enter(&os->os_lock);
 609                         dn = list_next(&os->os_dnodes, &dn_marker);
 610                         list_remove(&os->os_dnodes, &dn_marker);
 611                 } else {
 612                         dn = list_next(&os->os_dnodes, dn);
 613                 }
 614         }
 615         mutex_exit(&os->os_lock);
 616 
 617         if (DMU_USERUSED_DNODE(os) != NULL) {
 618                 dnode_evict_dbufs(DMU_GROUPUSED_DNODE(os));
 619                 dnode_evict_dbufs(DMU_USERUSED_DNODE(os));
 620         }
 621         dnode_evict_dbufs(DMU_META_DNODE(os));
 622 }
 623 
 624 /*
 625  * Objset eviction processing is split into into two pieces.
 626  * The first marks the objset as evicting, evicts any dbufs that
 627  * have a refcount of zero, and then queues up the objset for the
 628  * second phase of eviction.  Once os->os_dnodes has been cleared by
 629  * dnode_buf_pageout()->dnode_destroy(), the second phase is executed.
 630  * The second phase closes the special dnodes, dequeues the objset from
 631  * the list of those undergoing eviction, and finally frees the objset.
 632  *
 633  * NOTE: Due to asynchronous eviction processing (invocation of
 634  *       dnode_buf_pageout()), it is possible for the meta dnode for the
 635  *       objset to have no holds even though os->os_dnodes is not empty.
 636  */
 637 void
 638 dmu_objset_evict(objset_t *os)
 639 {
 640         dsl_dataset_t *ds = os->os_dsl_dataset;
 641 
 642         for (int t = 0; t < TXG_SIZE; t++)
 643                 ASSERT(!dmu_objset_is_dirty(os, t));
 644 
 645         if (ds) {
 646                 if (!ds->ds_is_snapshot) {
 647                         VERIFY0(dsl_prop_unregister(ds,
 648                             zfs_prop_to_name(ZFS_PROP_CHECKSUM),
 649                             checksum_changed_cb, os));
 650                         VERIFY0(dsl_prop_unregister(ds,
 651                             zfs_prop_to_name(ZFS_PROP_COMPRESSION),
 652                             compression_changed_cb, os));
 653                         VERIFY0(dsl_prop_unregister(ds,
 654                             zfs_prop_to_name(ZFS_PROP_COPIES),
 655                             copies_changed_cb, os));
 656                         VERIFY0(dsl_prop_unregister(ds,
 657                             zfs_prop_to_name(ZFS_PROP_DEDUP),
 658                             dedup_changed_cb, os));
 659                         VERIFY0(dsl_prop_unregister(ds,
 660                             zfs_prop_to_name(ZFS_PROP_LOGBIAS),
 661                             logbias_changed_cb, os));
 662                         VERIFY0(dsl_prop_unregister(ds,
 663                             zfs_prop_to_name(ZFS_PROP_SYNC),
 664                             sync_changed_cb, os));
 665                         VERIFY0(dsl_prop_unregister(ds,
 666                             zfs_prop_to_name(ZFS_PROP_REDUNDANT_METADATA),
 667                             redundant_metadata_changed_cb, os));
 668                         VERIFY0(dsl_prop_unregister(ds,
 669                             zfs_prop_to_name(ZFS_PROP_RECORDSIZE),
 670                             recordsize_changed_cb, os));
 671                 }
 672                 VERIFY0(dsl_prop_unregister(ds,
 673                     zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
 674                     primary_cache_changed_cb, os));
 675                 VERIFY0(dsl_prop_unregister(ds,
 676                     zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE),
 677                     secondary_cache_changed_cb, os));
 678         }
 679 
 680         if (os->os_sa)
 681                 sa_tear_down(os);
 682 
 683         os->os_evicting = B_TRUE;
 684         dmu_objset_evict_dbufs(os);
 685 
 686         mutex_enter(&os->os_lock);
 687         spa_evicting_os_register(os->os_spa, os);
 688         if (list_is_empty(&os->os_dnodes)) {
 689                 mutex_exit(&os->os_lock);
 690                 dmu_objset_evict_done(os);
 691         } else {
 692                 mutex_exit(&os->os_lock);
 693         }
 694 }
 695 
 696 void
 697 dmu_objset_evict_done(objset_t *os)
 698 {
 699         ASSERT3P(list_head(&os->os_dnodes), ==, NULL);
 700 
 701         dnode_special_close(&os->os_meta_dnode);
 702         if (DMU_USERUSED_DNODE(os)) {
 703                 dnode_special_close(&os->os_userused_dnode);
 704                 dnode_special_close(&os->os_groupused_dnode);
 705         }
 706         zil_free(os->os_zil);
 707 


 708         VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf));
 709 
 710         /*
 711          * This is a barrier to prevent the objset from going away in
 712          * dnode_move() until we can safely ensure that the objset is still in
 713          * use. We consider the objset valid before the barrier and invalid
 714          * after the barrier.
 715          */
 716         rw_enter(&os_lock, RW_READER);
 717         rw_exit(&os_lock);
 718 
 719         mutex_destroy(&os->os_lock);
 720         mutex_destroy(&os->os_obj_lock);
 721         mutex_destroy(&os->os_user_ptr_lock);
 722         spa_evicting_os_deregister(os->os_spa, os);
 723         kmem_free(os, sizeof (objset_t));
 724 }
 725 
 726 timestruc_t
 727 dmu_objset_snap_cmtime(objset_t *os)
 728 {
 729         return (dsl_dir_snap_cmtime(os->os_dsl_dataset->ds_dir));
 730 }
 731 
 732 /* called from dsl for meta-objset */
 733 objset_t *
 734 dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
 735     dmu_objset_type_t type, dmu_tx_t *tx)
 736 {
 737         objset_t *os;
 738         dnode_t *mdn;
 739 
 740         ASSERT(dmu_tx_is_syncing(tx));
 741 
 742         if (ds != NULL)


 911         }
 912         error = dsl_fs_ss_limit_check(pdd, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL,
 913             doca->doca_cred);
 914         if (error != 0) {
 915                 dsl_dir_rele(pdd, FTAG);
 916                 return (SET_ERROR(EDQUOT));
 917         }
 918         dsl_dir_rele(pdd, FTAG);
 919 
 920         error = dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin);
 921         if (error != 0)
 922                 return (error);
 923 
 924         /* You can't clone across pools. */
 925         if (origin->ds_dir->dd_pool != dp) {
 926                 dsl_dataset_rele(origin, FTAG);
 927                 return (SET_ERROR(EXDEV));
 928         }
 929 
 930         /* You can only clone snapshots, not the head datasets. */
 931         if (!origin->ds_is_snapshot) {
 932                 dsl_dataset_rele(origin, FTAG);
 933                 return (SET_ERROR(EINVAL));
 934         }
 935         dsl_dataset_rele(origin, FTAG);
 936 
 937         return (0);
 938 }
 939 
 940 static void
 941 dmu_objset_clone_sync(void *arg, dmu_tx_t *tx)
 942 {
 943         dmu_objset_clone_arg_t *doca = arg;
 944         dsl_pool_t *dp = dmu_tx_pool(tx);
 945         dsl_dir_t *pdd;
 946         const char *tail;
 947         dsl_dataset_t *origin, *ds;
 948         uint64_t obj;
 949         char namebuf[MAXNAMELEN];
 950 
 951         VERIFY0(dsl_dir_hold(dp, doca->doca_clone, FTAG, &pdd, &tail));


1475 
1476 void
1477 dmu_objset_stats(objset_t *os, nvlist_t *nv)
1478 {
1479         ASSERT(os->os_dsl_dataset ||
1480             os->os_phys->os_type == DMU_OST_META);
1481 
1482         if (os->os_dsl_dataset != NULL)
1483                 dsl_dataset_stats(os->os_dsl_dataset, nv);
1484 
1485         dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_TYPE,
1486             os->os_phys->os_type);
1487         dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERACCOUNTING,
1488             dmu_objset_userspace_present(os));
1489 }
1490 
1491 int
1492 dmu_objset_is_snapshot(objset_t *os)
1493 {
1494         if (os->os_dsl_dataset != NULL)
1495                 return (os->os_dsl_dataset->ds_is_snapshot);
1496         else
1497                 return (B_FALSE);
1498 }
1499 
1500 int
1501 dmu_snapshot_realname(objset_t *os, char *name, char *real, int maxlen,
1502     boolean_t *conflict)
1503 {
1504         dsl_dataset_t *ds = os->os_dsl_dataset;
1505         uint64_t ignored;
1506 
1507         if (dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0)
1508                 return (SET_ERROR(ENOENT));
1509 
1510         return (zap_lookup_norm(ds->ds_dir->dd_pool->dp_meta_objset,
1511             dsl_dataset_phys(ds)->ds_snapnames_zapobj, name, 8, 1, &ignored,
1512             MT_FIRST, real, maxlen, conflict));
1513 }
1514 
1515 int