36 #include <sys/zfs_context.h>
37 #include <sys/dmu_objset.h>
38 #include <sys/dsl_dir.h>
39 #include <sys/dsl_dataset.h>
40 #include <sys/dsl_prop.h>
41 #include <sys/dsl_pool.h>
42 #include <sys/dsl_synctask.h>
43 #include <sys/dsl_deleg.h>
44 #include <sys/dnode.h>
45 #include <sys/dbuf.h>
46 #include <sys/zvol.h>
47 #include <sys/dmu_tx.h>
48 #include <sys/zap.h>
49 #include <sys/zil.h>
50 #include <sys/dmu_impl.h>
51 #include <sys/zfs_ioctl.h>
52 #include <sys/sa.h>
53 #include <sys/zfs_onexit.h>
54 #include <sys/dsl_destroy.h>
55 #include <sys/vdev.h>
56 #include <sys/zfeature.h>
57
58 /*
59 * Needed to close a window in dnode_move() that allows the objset to be freed
60 * before it can be safely accessed.
61 */
62 krwlock_t os_lock;
63
64 /*
65 * Tunable to overwrite the maximum number of threads for the parallization
66 * of dmu_objset_find_dp, needed to speed up the import of pools with many
67 * datasets.
68 * Default is 4 times the number of leaf vdevs.
69 */
70 int dmu_find_threads = 0;
71
72 /*
73 * Backfill lower metadnode objects after this many have been freed.
74 * Backfilling negatively impacts object creation rates, so only do it
75 * if there are enough holes to fill.
76 */
77 int dmu_rescan_dnode_threshold = 131072;
78
79 static void dmu_objset_find_dp_cb(void *arg);
80
81 void
82 dmu_objset_init(void)
83 {
84 rw_init(&os_lock, NULL, RW_DEFAULT, NULL);
85 }
86
87 void
88 dmu_objset_fini(void)
89 {
90 rw_destroy(&os_lock);
91 }
92
93 spa_t *
94 dmu_objset_spa(objset_t *os)
95 {
96 return (os->os_spa);
97 }
98
99 zilog_t *
100 dmu_objset_zil(objset_t *os)
101 {
102 return (os->os_zil);
103 }
104
105 dsl_pool_t *
106 dmu_objset_pool(objset_t *os)
107 {
108 dsl_dataset_t *ds;
109
110 if ((ds = os->os_dsl_dataset) != NULL && ds->ds_dir)
162 ASSERT(newval != ZIO_CHECKSUM_INHERIT);
163
164 os->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE);
165 }
166
167 static void
168 compression_changed_cb(void *arg, uint64_t newval)
169 {
170 objset_t *os = arg;
171
172 /*
173 * Inheritance and range checking should have been done by now.
174 */
175 ASSERT(newval != ZIO_COMPRESS_INHERIT);
176
177 os->os_compress = zio_compress_select(os->os_spa, newval,
178 ZIO_COMPRESS_ON);
179 }
180
181 static void
182 copies_changed_cb(void *arg, uint64_t newval)
183 {
184 objset_t *os = arg;
185
186 /*
187 * Inheritance and range checking should have been done by now.
188 */
189 ASSERT(newval > 0);
190 ASSERT(newval <= spa_max_replication(os->os_spa));
191
192 os->os_copies = newval;
193 }
194
195 static void
196 dedup_changed_cb(void *arg, uint64_t newval)
197 {
198 objset_t *os = arg;
199 spa_t *spa = os->os_spa;
200 enum zio_checksum checksum;
201
216 objset_t *os = arg;
217
218 /*
219 * Inheritance and range checking should have been done by now.
220 */
221 ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE ||
222 newval == ZFS_CACHE_METADATA);
223
224 os->os_primary_cache = newval;
225 }
226
227 static void
228 secondary_cache_changed_cb(void *arg, uint64_t newval)
229 {
230 objset_t *os = arg;
231
232 /*
233 * Inheritance and range checking should have been done by now.
234 */
235 ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE ||
236 newval == ZFS_CACHE_METADATA);
237
238 os->os_secondary_cache = newval;
239 }
240
241 static void
242 sync_changed_cb(void *arg, uint64_t newval)
243 {
244 objset_t *os = arg;
245
246 /*
247 * Inheritance and range checking should have been done by now.
248 */
249 ASSERT(newval == ZFS_SYNC_STANDARD || newval == ZFS_SYNC_ALWAYS ||
250 newval == ZFS_SYNC_DISABLED);
251
252 os->os_sync = newval;
253 if (os->os_zil)
254 zil_set_sync(os->os_zil, newval);
255 }
256
257 static void
258 redundant_metadata_changed_cb(void *arg, uint64_t newval)
259 {
260 objset_t *os = arg;
261
332 dnode_multilist_index_func(multilist_t *ml, void *obj)
333 {
334 dnode_t *dn = obj;
335 return (dnode_hash(dn->dn_objset, dn->dn_object) %
336 multilist_get_num_sublists(ml));
337 }
338
339 /*
340 * Instantiates the objset_t in-memory structure corresponding to the
341 * objset_phys_t that's pointed to by the specified blkptr_t.
342 */
343 int
344 dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
345 objset_t **osp)
346 {
347 objset_t *os;
348 int i, err;
349
350 ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock));
351
352 /*
353 * The $ORIGIN dataset (if it exists) doesn't have an associated
354 * objset, so there's no reason to open it. The $ORIGIN dataset
355 * will not exist on pools older than SPA_VERSION_ORIGIN.
356 */
357 if (ds != NULL && spa_get_dsl(spa) != NULL &&
358 spa_get_dsl(spa)->dp_origin_snap != NULL) {
359 ASSERT3P(ds->ds_dir, !=,
360 spa_get_dsl(spa)->dp_origin_snap->ds_dir);
361 }
362
363 os = kmem_zalloc(sizeof (objset_t), KM_SLEEP);
364 os->os_dsl_dataset = ds;
365 os->os_spa = spa;
366 os->os_rootbp = bp;
367 if (!BP_IS_HOLE(os->os_rootbp)) {
368 arc_flags_t aflags = ARC_FLAG_WAIT;
369 zbookmark_phys_t zb;
370 SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
371 ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
372
373 if (DMU_OS_IS_L2CACHEABLE(os))
374 aflags |= ARC_FLAG_L2CACHE;
375
376 dprintf_bp(os->os_rootbp, "reading %s", "");
377 err = arc_read(NULL, spa, os->os_rootbp,
378 arc_getbuf_func, &os->os_phys_buf,
379 ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb);
380 if (err != 0) {
381 kmem_free(os, sizeof (objset_t));
382 /* convert checksum errors into IO errors */
417 if (ds != NULL) {
418 boolean_t needlock = B_FALSE;
419
420 /*
421 * Note: it's valid to open the objset if the dataset is
422 * long-held, in which case the pool_config lock will not
423 * be held.
424 */
425 if (!dsl_pool_config_held(dmu_objset_pool(os))) {
426 needlock = B_TRUE;
427 dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
428 }
429 err = dsl_prop_register(ds,
430 zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
431 primary_cache_changed_cb, os);
432 if (err == 0) {
433 err = dsl_prop_register(ds,
434 zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE),
435 secondary_cache_changed_cb, os);
436 }
437 if (!ds->ds_is_snapshot) {
438 if (err == 0) {
439 err = dsl_prop_register(ds,
440 zfs_prop_to_name(ZFS_PROP_CHECKSUM),
441 checksum_changed_cb, os);
442 }
443 if (err == 0) {
444 err = dsl_prop_register(ds,
445 zfs_prop_to_name(ZFS_PROP_COMPRESSION),
446 compression_changed_cb, os);
447 }
448 if (err == 0) {
449 err = dsl_prop_register(ds,
450 zfs_prop_to_name(ZFS_PROP_COPIES),
451 copies_changed_cb, os);
452 }
453 if (err == 0) {
454 err = dsl_prop_register(ds,
455 zfs_prop_to_name(ZFS_PROP_DEDUP),
456 dedup_changed_cb, os);
457 }
458 if (err == 0) {
459 err = dsl_prop_register(ds,
460 zfs_prop_to_name(ZFS_PROP_LOGBIAS),
461 logbias_changed_cb, os);
462 }
463 if (err == 0) {
464 err = dsl_prop_register(ds,
465 zfs_prop_to_name(ZFS_PROP_SYNC),
466 sync_changed_cb, os);
467 }
468 if (err == 0) {
469 err = dsl_prop_register(ds,
470 zfs_prop_to_name(
471 ZFS_PROP_REDUNDANT_METADATA),
472 redundant_metadata_changed_cb, os);
473 }
474 if (err == 0) {
475 err = dsl_prop_register(ds,
476 zfs_prop_to_name(ZFS_PROP_RECORDSIZE),
477 recordsize_changed_cb, os);
478 }
479 }
480 if (needlock)
481 dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
482 if (err != 0) {
483 arc_buf_destroy(os->os_phys_buf, &os->os_phys_buf);
484 kmem_free(os, sizeof (objset_t));
485 return (err);
486 }
487 } else {
488 /* It's the meta-objset. */
489 os->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
490 os->os_compress = ZIO_COMPRESS_ON;
491 os->os_copies = spa_max_replication(spa);
492 os->os_dedup_checksum = ZIO_CHECKSUM_OFF;
493 os->os_dedup_verify = B_FALSE;
494 os->os_logbias = ZFS_LOGBIAS_LATENCY;
495 os->os_sync = ZFS_SYNC_STANDARD;
496 os->os_primary_cache = ZFS_CACHE_ALL;
497 os->os_secondary_cache = ZFS_CACHE_ALL;
498 }
499
500 if (ds == NULL || !ds->ds_is_snapshot)
501 os->os_zil_header = os->os_phys->os_zil_header;
502 os->os_zil = zil_alloc(os, &os->os_zil_header);
503
504 for (i = 0; i < TXG_SIZE; i++) {
505 os->os_dirty_dnodes[i] = multilist_create(sizeof (dnode_t),
506 offsetof(dnode_t, dn_dirty_link[i]),
507 dnode_multilist_index_func);
508 }
509 list_create(&os->os_dnodes, sizeof (dnode_t),
510 offsetof(dnode_t, dn_link));
511 list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t),
512 offsetof(dmu_buf_impl_t, db_link));
513
514 mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL);
515 mutex_init(&os->os_userused_lock, NULL, MUTEX_DEFAULT, NULL);
516 mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);
517 mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL);
518
697 }
698
699 void
700 dmu_objset_evict_dbufs(objset_t *os)
701 {
702 dnode_t dn_marker;
703 dnode_t *dn;
704
705 mutex_enter(&os->os_lock);
706 dn = list_head(&os->os_dnodes);
707 while (dn != NULL) {
708 /*
709 * Skip dnodes without holds. We have to do this dance
710 * because dnode_add_ref() only works if there is already a
711 * hold. If the dnode has no holds, then it has no dbufs.
712 */
713 if (dnode_add_ref(dn, FTAG)) {
714 list_insert_after(&os->os_dnodes, dn, &dn_marker);
715 mutex_exit(&os->os_lock);
716
717 dnode_evict_dbufs(dn);
718 dnode_rele(dn, FTAG);
719
720 mutex_enter(&os->os_lock);
721 dn = list_next(&os->os_dnodes, &dn_marker);
722 list_remove(&os->os_dnodes, &dn_marker);
723 } else {
724 dn = list_next(&os->os_dnodes, dn);
725 }
726 }
727 mutex_exit(&os->os_lock);
728
729 if (DMU_USERUSED_DNODE(os) != NULL) {
730 dnode_evict_dbufs(DMU_GROUPUSED_DNODE(os));
731 dnode_evict_dbufs(DMU_USERUSED_DNODE(os));
732 }
733 dnode_evict_dbufs(DMU_META_DNODE(os));
734 }
735
736 /*
737 * Objset eviction processing is split into into two pieces.
738 * The first marks the objset as evicting, evicts any dbufs that
739 * have a refcount of zero, and then queues up the objset for the
740 * second phase of eviction. Once os->os_dnodes has been cleared by
741 * dnode_buf_pageout()->dnode_destroy(), the second phase is executed.
742 * The second phase closes the special dnodes, dequeues the objset from
743 * the list of those undergoing eviction, and finally frees the objset.
744 *
745 * NOTE: Due to asynchronous eviction processing (invocation of
746 * dnode_buf_pageout()), it is possible for the meta dnode for the
747 * objset to have no holds even though os->os_dnodes is not empty.
748 */
749 void
750 dmu_objset_evict(objset_t *os)
751 {
752 dsl_dataset_t *ds = os->os_dsl_dataset;
753
1047 "origin=%s (%llu)", namebuf, origin->ds_object);
1048 dsl_dataset_rele(ds, FTAG);
1049 dsl_dataset_rele(origin, FTAG);
1050 dsl_dir_rele(pdd, FTAG);
1051 }
1052
1053 int
1054 dmu_objset_clone(const char *clone, const char *origin)
1055 {
1056 dmu_objset_clone_arg_t doca;
1057
1058 doca.doca_clone = clone;
1059 doca.doca_origin = origin;
1060 doca.doca_cred = CRED();
1061
1062 return (dsl_sync_task(clone,
1063 dmu_objset_clone_check, dmu_objset_clone_sync, &doca,
1064 5, ZFS_SPACE_CHECK_NORMAL));
1065 }
1066
1067 static int
1068 dmu_objset_remap_indirects_impl(objset_t *os, uint64_t last_removed_txg)
1069 {
1070 int error = 0;
1071 uint64_t object = 0;
1072 while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
1073 error = dmu_object_remap_indirects(os, object,
1074 last_removed_txg);
1075 /*
1076 * If the ZPL removed the object before we managed to dnode_hold
1077 * it, we would get an ENOENT. If the ZPL declares its intent
1078 * to remove the object (dnode_free) before we manage to
1079 * dnode_hold it, we would get an EEXIST. In either case, we
1080 * want to continue remapping the other objects in the objset;
1081 * in all other cases, we want to break early.
1082 */
1083 if (error != 0 && error != ENOENT && error != EEXIST) {
1084 break;
1085 }
1086 }
1087 if (error == ESRCH) {
1088 error = 0;
1089 }
1090 return (error);
1091 }
1092
1093 int
1094 dmu_objset_remap_indirects(const char *fsname)
1095 {
1096 int error = 0;
1097 objset_t *os = NULL;
1098 uint64_t last_removed_txg;
1099 uint64_t remap_start_txg;
1100 dsl_dir_t *dd;
1101
1102 error = dmu_objset_hold(fsname, FTAG, &os);
1103 if (error != 0) {
1104 return (error);
1105 }
1106 dd = dmu_objset_ds(os)->ds_dir;
1107
1108 if (!spa_feature_is_enabled(dmu_objset_spa(os),
1109 SPA_FEATURE_OBSOLETE_COUNTS)) {
1110 dmu_objset_rele(os, FTAG);
1111 return (SET_ERROR(ENOTSUP));
1112 }
1113
1114 if (dsl_dataset_is_snapshot(dmu_objset_ds(os))) {
1115 dmu_objset_rele(os, FTAG);
1116 return (SET_ERROR(EINVAL));
1117 }
1118
1119 /*
1120 * If there has not been a removal, we're done.
1121 */
1122 last_removed_txg = spa_get_last_removal_txg(dmu_objset_spa(os));
1123 if (last_removed_txg == -1ULL) {
1124 dmu_objset_rele(os, FTAG);
1125 return (0);
1126 }
1127
1128 /*
1129 * If we have remapped since the last removal, we're done.
1130 */
1131 if (dsl_dir_is_zapified(dd)) {
1132 uint64_t last_remap_txg;
1133 if (zap_lookup(spa_meta_objset(dmu_objset_spa(os)),
1134 dd->dd_object, DD_FIELD_LAST_REMAP_TXG,
1135 sizeof (last_remap_txg), 1, &last_remap_txg) == 0 &&
1136 last_remap_txg > last_removed_txg) {
1137 dmu_objset_rele(os, FTAG);
1138 return (0);
1139 }
1140 }
1141
1142 dsl_dataset_long_hold(dmu_objset_ds(os), FTAG);
1143 dsl_pool_rele(dmu_objset_pool(os), FTAG);
1144
1145 remap_start_txg = spa_last_synced_txg(dmu_objset_spa(os));
1146 error = dmu_objset_remap_indirects_impl(os, last_removed_txg);
1147 if (error == 0) {
1148 /*
1149 * We update the last_remap_txg to be the start txg so that
1150 * we can guarantee that every block older than last_remap_txg
1151 * that can be remapped has been remapped.
1152 */
1153 error = dsl_dir_update_last_remap_txg(dd, remap_start_txg);
1154 }
1155
1156 dsl_dataset_long_rele(dmu_objset_ds(os), FTAG);
1157 dsl_dataset_rele(dmu_objset_ds(os), FTAG);
1158
1159 return (error);
1160 }
1161
1162 int
1163 dmu_objset_snapshot_one(const char *fsname, const char *snapname)
1164 {
1165 int err;
1166 char *longsnap = kmem_asprintf("%s@%s", fsname, snapname);
1167 nvlist_t *snaps = fnvlist_alloc();
1168
1169 fnvlist_add_boolean(snaps, longsnap);
1170 strfree(longsnap);
1171 err = dsl_dataset_snapshot(snaps, NULL, NULL);
1172 fnvlist_free(snaps);
1173 return (err);
1174 }
1175
1176 static void
1177 dmu_objset_sync_dnodes(multilist_sublist_t *list, dmu_tx_t *tx)
1178 {
1179 dnode_t *dn;
1180
1181 while ((dn = multilist_sublist_head(list)) != NULL) {
1182 ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);
1296 * This is the MOS. If we have upgraded,
1297 * spa_max_replication() could change, so reset
1298 * os_copies here.
1299 */
1300 os->os_copies = spa_max_replication(os->os_spa);
1301 }
1302
1303 /*
1304 * Create the root block IO
1305 */
1306 SET_BOOKMARK(&zb, os->os_dsl_dataset ?
1307 os->os_dsl_dataset->ds_object : DMU_META_OBJSET,
1308 ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
1309 arc_release(os->os_phys_buf, &os->os_phys_buf);
1310
1311 dmu_write_policy(os, NULL, 0, 0, &zp);
1312
1313 zio = arc_write(pio, os->os_spa, tx->tx_txg,
1314 blkptr_copy, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os),
1315 &zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done,
1316 os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
1317
1318 /*
1319 * Sync special dnodes - the parent IO for the sync is the root block
1320 */
1321 DMU_META_DNODE(os)->dn_zio = zio;
1322 dnode_sync(DMU_META_DNODE(os), tx);
1323
1324 os->os_phys->os_flags = os->os_flags;
1325
1326 if (DMU_USERUSED_DNODE(os) &&
1327 DMU_USERUSED_DNODE(os)->dn_type != DMU_OT_NONE) {
1328 DMU_USERUSED_DNODE(os)->dn_zio = zio;
1329 dnode_sync(DMU_USERUSED_DNODE(os), tx);
1330 DMU_GROUPUSED_DNODE(os)->dn_zio = zio;
1331 dnode_sync(DMU_GROUPUSED_DNODE(os), tx);
1332 }
1333
1334 txgoff = tx->tx_txg & TXG_MASK;
1335
1336 if (dmu_objset_userused_enabled(os)) {
1836 return (os->os_dsl_dataset->ds_is_snapshot);
1837 else
1838 return (B_FALSE);
1839 }
1840
1841 int
1842 dmu_snapshot_realname(objset_t *os, char *name, char *real, int maxlen,
1843 boolean_t *conflict)
1844 {
1845 dsl_dataset_t *ds = os->os_dsl_dataset;
1846 uint64_t ignored;
1847
1848 if (dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0)
1849 return (SET_ERROR(ENOENT));
1850
1851 return (zap_lookup_norm(ds->ds_dir->dd_pool->dp_meta_objset,
1852 dsl_dataset_phys(ds)->ds_snapnames_zapobj, name, 8, 1, &ignored,
1853 MT_NORMALIZE, real, maxlen, conflict));
1854 }
1855
1856 int
1857 dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
1858 uint64_t *idp, uint64_t *offp, boolean_t *case_conflict)
1859 {
1860 dsl_dataset_t *ds = os->os_dsl_dataset;
1861 zap_cursor_t cursor;
1862 zap_attribute_t attr;
1863
1864 ASSERT(dsl_pool_config_held(dmu_objset_pool(os)));
1865
1866 if (dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0)
1867 return (SET_ERROR(ENOENT));
1868
1869 zap_cursor_init_serialized(&cursor,
1870 ds->ds_dir->dd_pool->dp_meta_objset,
1871 dsl_dataset_phys(ds)->ds_snapnames_zapobj, *offp);
1872
1873 if (zap_cursor_retrieve(&cursor, &attr) != 0) {
1874 zap_cursor_fini(&cursor);
1875 return (SET_ERROR(ENOENT));
|
36 #include <sys/zfs_context.h>
37 #include <sys/dmu_objset.h>
38 #include <sys/dsl_dir.h>
39 #include <sys/dsl_dataset.h>
40 #include <sys/dsl_prop.h>
41 #include <sys/dsl_pool.h>
42 #include <sys/dsl_synctask.h>
43 #include <sys/dsl_deleg.h>
44 #include <sys/dnode.h>
45 #include <sys/dbuf.h>
46 #include <sys/zvol.h>
47 #include <sys/dmu_tx.h>
48 #include <sys/zap.h>
49 #include <sys/zil.h>
50 #include <sys/dmu_impl.h>
51 #include <sys/zfs_ioctl.h>
52 #include <sys/sa.h>
53 #include <sys/zfs_onexit.h>
54 #include <sys/dsl_destroy.h>
55 #include <sys/vdev.h>
56 #include <sys/wbc.h>
57
58 /*
59 * Needed to close a window in dnode_move() that allows the objset to be freed
60 * before it can be safely accessed.
61 */
62 krwlock_t os_lock;
63
64 extern kmem_cache_t *zfs_ds_collector_cache;
65
66 /*
67 * Tunable to overwrite the maximum number of threads for the parallization
68 * of dmu_objset_find_dp, needed to speed up the import of pools with many
69 * datasets.
70 * Default is 4 times the number of leaf vdevs.
71 */
72 int dmu_find_threads = 0;
73
74 /*
75 * Backfill lower metadnode objects after this many have been freed.
76 * Backfilling negatively impacts object creation rates, so only do it
77 * if there are enough holes to fill.
78 */
79 int dmu_rescan_dnode_threshold = 131072;
80
81 static void dmu_objset_find_dp_cb(void *arg);
82
83 /* ARGSUSED */
84 static int
85 zfs_ds_collector_constructor(void *ds_el, void *unused, int flags)
86 {
87 bzero(ds_el, sizeof (zfs_ds_collector_entry_t));
88 return (0);
89 }
90
91 void
92 dmu_objset_init(void)
93 {
94 zfs_ds_collector_cache = kmem_cache_create("zfs_ds_collector_cache",
95 sizeof (zfs_ds_collector_entry_t),
96 8, zfs_ds_collector_constructor,
97 NULL, NULL, NULL, NULL, 0);
98 rw_init(&os_lock, NULL, RW_DEFAULT, NULL);
99 }
100
101 void
102 dmu_objset_fini(void)
103 {
104 rw_destroy(&os_lock);
105 kmem_cache_destroy(zfs_ds_collector_cache);
106 }
107
108 spa_t *
109 dmu_objset_spa(objset_t *os)
110 {
111 return (os->os_spa);
112 }
113
114 zilog_t *
115 dmu_objset_zil(objset_t *os)
116 {
117 return (os->os_zil);
118 }
119
120 dsl_pool_t *
121 dmu_objset_pool(objset_t *os)
122 {
123 dsl_dataset_t *ds;
124
125 if ((ds = os->os_dsl_dataset) != NULL && ds->ds_dir)
177 ASSERT(newval != ZIO_CHECKSUM_INHERIT);
178
179 os->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE);
180 }
181
182 static void
183 compression_changed_cb(void *arg, uint64_t newval)
184 {
185 objset_t *os = arg;
186
187 /*
188 * Inheritance and range checking should have been done by now.
189 */
190 ASSERT(newval != ZIO_COMPRESS_INHERIT);
191
192 os->os_compress = zio_compress_select(os->os_spa, newval,
193 ZIO_COMPRESS_ON);
194 }
195
196 static void
197 smartcomp_changed_cb(void *arg, uint64_t newval)
198 {
199 objset_t *os = arg;
200
201 os->os_smartcomp_enabled = newval ? B_TRUE : B_FALSE;
202 }
203
204 static void
205 copies_changed_cb(void *arg, uint64_t newval)
206 {
207 objset_t *os = arg;
208
209 /*
210 * Inheritance and range checking should have been done by now.
211 */
212 ASSERT(newval > 0);
213 ASSERT(newval <= spa_max_replication(os->os_spa));
214
215 os->os_copies = newval;
216 }
217
218 static void
219 dedup_changed_cb(void *arg, uint64_t newval)
220 {
221 objset_t *os = arg;
222 spa_t *spa = os->os_spa;
223 enum zio_checksum checksum;
224
239 objset_t *os = arg;
240
241 /*
242 * Inheritance and range checking should have been done by now.
243 */
244 ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE ||
245 newval == ZFS_CACHE_METADATA);
246
247 os->os_primary_cache = newval;
248 }
249
250 static void
251 secondary_cache_changed_cb(void *arg, uint64_t newval)
252 {
253 objset_t *os = arg;
254
255 /*
256 * Inheritance and range checking should have been done by now.
257 */
258 ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE ||
259 newval == ZFS_CACHE_METADATA || newval == ZFS_CACHE_DATA);
260
261 os->os_secondary_cache = newval;
262 }
263
264 static void
265 zpl_meta_placement_changed_cb(void *arg, uint64_t newval)
266 {
267 objset_t *os = arg;
268
269 os->os_zpl_meta_to_special = newval;
270 }
271
272 static void
273 sync_changed_cb(void *arg, uint64_t newval)
274 {
275 objset_t *os = arg;
276
277 /*
278 * Inheritance and range checking should have been done by now.
279 */
280 ASSERT(newval == ZFS_SYNC_STANDARD || newval == ZFS_SYNC_ALWAYS ||
281 newval == ZFS_SYNC_DISABLED);
282
283 os->os_sync = newval;
284 if (os->os_zil)
285 zil_set_sync(os->os_zil, newval);
286 }
287
288 static void
289 redundant_metadata_changed_cb(void *arg, uint64_t newval)
290 {
291 objset_t *os = arg;
292
363 dnode_multilist_index_func(multilist_t *ml, void *obj)
364 {
365 dnode_t *dn = obj;
366 return (dnode_hash(dn->dn_objset, dn->dn_object) %
367 multilist_get_num_sublists(ml));
368 }
369
370 /*
371 * Instantiates the objset_t in-memory structure corresponding to the
372 * objset_phys_t that's pointed to by the specified blkptr_t.
373 */
374 int
375 dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
376 objset_t **osp)
377 {
378 objset_t *os;
379 int i, err;
380
381 ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock));
382
383 os = kmem_zalloc(sizeof (objset_t), KM_SLEEP);
384 os->os_dsl_dataset = ds;
385 os->os_spa = spa;
386 os->os_rootbp = bp;
387 if (!BP_IS_HOLE(os->os_rootbp)) {
388 arc_flags_t aflags = ARC_FLAG_WAIT;
389 zbookmark_phys_t zb;
390 SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
391 ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
392
393 if (DMU_OS_IS_L2CACHEABLE(os))
394 aflags |= ARC_FLAG_L2CACHE;
395
396 dprintf_bp(os->os_rootbp, "reading %s", "");
397 err = arc_read(NULL, spa, os->os_rootbp,
398 arc_getbuf_func, &os->os_phys_buf,
399 ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb);
400 if (err != 0) {
401 kmem_free(os, sizeof (objset_t));
402 /* convert checksum errors into IO errors */
437 if (ds != NULL) {
438 boolean_t needlock = B_FALSE;
439
440 /*
441 * Note: it's valid to open the objset if the dataset is
442 * long-held, in which case the pool_config lock will not
443 * be held.
444 */
445 if (!dsl_pool_config_held(dmu_objset_pool(os))) {
446 needlock = B_TRUE;
447 dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
448 }
449 err = dsl_prop_register(ds,
450 zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
451 primary_cache_changed_cb, os);
452 if (err == 0) {
453 err = dsl_prop_register(ds,
454 zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE),
455 secondary_cache_changed_cb, os);
456 }
457 if (err == 0) {
458 err = dsl_prop_register(ds,
459 zfs_prop_to_name(ZFS_PROP_ZPL_META_TO_METADEV),
460 zpl_meta_placement_changed_cb, os);
461 }
462 if (!ds->ds_is_snapshot) {
463 if (err == 0) {
464 err = dsl_prop_register(ds,
465 zfs_prop_to_name(ZFS_PROP_CHECKSUM),
466 checksum_changed_cb, os);
467 }
468 if (err == 0) {
469 err = dsl_prop_register(ds,
470 zfs_prop_to_name(ZFS_PROP_COMPRESSION),
471 compression_changed_cb, os);
472 }
473 if (err == 0) {
474 err = dsl_prop_register(ds,
475 zfs_prop_to_name(ZFS_PROP_SMARTCOMPRESSION),
476 smartcomp_changed_cb, os);
477 }
478 if (err == 0) {
479 err = dsl_prop_register(ds,
480 zfs_prop_to_name(ZFS_PROP_COPIES),
481 copies_changed_cb, os);
482 }
483 if (err == 0) {
484 err = dsl_prop_register(ds,
485 zfs_prop_to_name(ZFS_PROP_DEDUP),
486 dedup_changed_cb, os);
487 }
488 if (err == 0) {
489 err = dsl_prop_register(ds,
490 zfs_prop_to_name(ZFS_PROP_LOGBIAS),
491 logbias_changed_cb, os);
492 }
493 if (err == 0) {
494 err = dsl_prop_register(ds,
495 zfs_prop_to_name(ZFS_PROP_SYNC),
496 sync_changed_cb, os);
497 }
498 if (err == 0) {
499 err = dsl_prop_register(ds,
500 zfs_prop_to_name(
501 ZFS_PROP_REDUNDANT_METADATA),
502 redundant_metadata_changed_cb, os);
503 }
504 if (err == 0) {
505 err = dsl_prop_register(ds,
506 zfs_prop_to_name(ZFS_PROP_RECORDSIZE),
507 recordsize_changed_cb, os);
508 }
509 if (err == 0) {
510 err = dsl_prop_register(ds,
511 zfs_prop_to_name(ZFS_PROP_WBC_MODE),
512 wbc_mode_changed, os);
513 }
514 }
515 if (needlock)
516 dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
517 if (err != 0) {
518 arc_buf_destroy(os->os_phys_buf, &os->os_phys_buf);
519 kmem_free(os, sizeof (objset_t));
520 return (err);
521 }
522 } else {
523 /* It's the meta-objset. */
524 os->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
525 os->os_compress = ZIO_COMPRESS_ON;
526 os->os_copies = spa_max_replication(spa);
527 os->os_dedup_checksum = ZIO_CHECKSUM_OFF;
528 os->os_dedup_verify = B_FALSE;
529 os->os_logbias = ZFS_LOGBIAS_LATENCY;
530 os->os_sync = ZFS_SYNC_STANDARD;
531 os->os_primary_cache = ZFS_CACHE_ALL;
532 os->os_secondary_cache = ZFS_CACHE_ALL;
533 os->os_zpl_meta_to_special = 0;
534 }
535 /*
536 * These properties will be filled in by the logic in zfs_get_zplprop()
537 * when they are queried for the first time.
538 */
539 os->os_version = OBJSET_PROP_UNINITIALIZED;
540 os->os_normalization = OBJSET_PROP_UNINITIALIZED;
541 os->os_utf8only = OBJSET_PROP_UNINITIALIZED;
542 os->os_casesensitivity = OBJSET_PROP_UNINITIALIZED;
543
544 if (ds == NULL || !ds->ds_is_snapshot)
545 os->os_zil_header = os->os_phys->os_zil_header;
546 os->os_zil = zil_alloc(os, &os->os_zil_header);
547
548 for (i = 0; i < TXG_SIZE; i++) {
549 os->os_dirty_dnodes[i] = multilist_create(sizeof (dnode_t),
550 offsetof(dnode_t, dn_dirty_link[i]),
551 dnode_multilist_index_func);
552 }
553 list_create(&os->os_dnodes, sizeof (dnode_t),
554 offsetof(dnode_t, dn_link));
555 list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t),
556 offsetof(dmu_buf_impl_t, db_link));
557
558 mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL);
559 mutex_init(&os->os_userused_lock, NULL, MUTEX_DEFAULT, NULL);
560 mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);
561 mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL);
562
741 }
742
743 void
744 dmu_objset_evict_dbufs(objset_t *os)
745 {
746 dnode_t dn_marker;
747 dnode_t *dn;
748
749 mutex_enter(&os->os_lock);
750 dn = list_head(&os->os_dnodes);
751 while (dn != NULL) {
752 /*
753 * Skip dnodes without holds. We have to do this dance
754 * because dnode_add_ref() only works if there is already a
755 * hold. If the dnode has no holds, then it has no dbufs.
756 */
757 if (dnode_add_ref(dn, FTAG)) {
758 list_insert_after(&os->os_dnodes, dn, &dn_marker);
759 mutex_exit(&os->os_lock);
760
761 dnode_evict_dbufs(dn, DBUF_EVICT_ALL);
762 dnode_rele(dn, FTAG);
763
764 mutex_enter(&os->os_lock);
765 dn = list_next(&os->os_dnodes, &dn_marker);
766 list_remove(&os->os_dnodes, &dn_marker);
767 } else {
768 dn = list_next(&os->os_dnodes, dn);
769 }
770 }
771 mutex_exit(&os->os_lock);
772
773 if (DMU_USERUSED_DNODE(os) != NULL) {
774 dnode_evict_dbufs(DMU_GROUPUSED_DNODE(os), DBUF_EVICT_ALL);
775 dnode_evict_dbufs(DMU_USERUSED_DNODE(os), DBUF_EVICT_ALL);
776 }
777 dnode_evict_dbufs(DMU_META_DNODE(os), DBUF_EVICT_ALL);
778 }
779
780 /*
781 * Objset eviction processing is split into into two pieces.
782 * The first marks the objset as evicting, evicts any dbufs that
783 * have a refcount of zero, and then queues up the objset for the
784 * second phase of eviction. Once os->os_dnodes has been cleared by
785 * dnode_buf_pageout()->dnode_destroy(), the second phase is executed.
786 * The second phase closes the special dnodes, dequeues the objset from
787 * the list of those undergoing eviction, and finally frees the objset.
788 *
789 * NOTE: Due to asynchronous eviction processing (invocation of
790 * dnode_buf_pageout()), it is possible for the meta dnode for the
791 * objset to have no holds even though os->os_dnodes is not empty.
792 */
793 void
794 dmu_objset_evict(objset_t *os)
795 {
796 dsl_dataset_t *ds = os->os_dsl_dataset;
797
1091 "origin=%s (%llu)", namebuf, origin->ds_object);
1092 dsl_dataset_rele(ds, FTAG);
1093 dsl_dataset_rele(origin, FTAG);
1094 dsl_dir_rele(pdd, FTAG);
1095 }
1096
1097 int
1098 dmu_objset_clone(const char *clone, const char *origin)
1099 {
1100 dmu_objset_clone_arg_t doca;
1101
1102 doca.doca_clone = clone;
1103 doca.doca_origin = origin;
1104 doca.doca_cred = CRED();
1105
1106 return (dsl_sync_task(clone,
1107 dmu_objset_clone_check, dmu_objset_clone_sync, &doca,
1108 5, ZFS_SPACE_CHECK_NORMAL));
1109 }
1110
1111 int
1112 dmu_objset_snapshot_one(const char *fsname, const char *snapname)
1113 {
1114 int err;
1115 char *longsnap = kmem_asprintf("%s@%s", fsname, snapname);
1116 nvlist_t *snaps = fnvlist_alloc();
1117
1118 fnvlist_add_boolean(snaps, longsnap);
1119 strfree(longsnap);
1120 err = dsl_dataset_snapshot(snaps, NULL, NULL);
1121 fnvlist_free(snaps);
1122 return (err);
1123 }
1124
1125 static void
1126 dmu_objset_sync_dnodes(multilist_sublist_t *list, dmu_tx_t *tx)
1127 {
1128 dnode_t *dn;
1129
1130 while ((dn = multilist_sublist_head(list)) != NULL) {
1131 ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);
1245 * This is the MOS. If we have upgraded,
1246 * spa_max_replication() could change, so reset
1247 * os_copies here.
1248 */
1249 os->os_copies = spa_max_replication(os->os_spa);
1250 }
1251
1252 /*
1253 * Create the root block IO
1254 */
1255 SET_BOOKMARK(&zb, os->os_dsl_dataset ?
1256 os->os_dsl_dataset->ds_object : DMU_META_OBJSET,
1257 ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
1258 arc_release(os->os_phys_buf, &os->os_phys_buf);
1259
1260 dmu_write_policy(os, NULL, 0, 0, &zp);
1261
1262 zio = arc_write(pio, os->os_spa, tx->tx_txg,
1263 blkptr_copy, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os),
1264 &zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done,
1265 os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb, NULL);
1266
1267 /*
1268 * Sync special dnodes - the parent IO for the sync is the root block
1269 */
1270 DMU_META_DNODE(os)->dn_zio = zio;
1271 dnode_sync(DMU_META_DNODE(os), tx);
1272
1273 os->os_phys->os_flags = os->os_flags;
1274
1275 if (DMU_USERUSED_DNODE(os) &&
1276 DMU_USERUSED_DNODE(os)->dn_type != DMU_OT_NONE) {
1277 DMU_USERUSED_DNODE(os)->dn_zio = zio;
1278 dnode_sync(DMU_USERUSED_DNODE(os), tx);
1279 DMU_GROUPUSED_DNODE(os)->dn_zio = zio;
1280 dnode_sync(DMU_GROUPUSED_DNODE(os), tx);
1281 }
1282
1283 txgoff = tx->tx_txg & TXG_MASK;
1284
1285 if (dmu_objset_userused_enabled(os)) {
1785 return (os->os_dsl_dataset->ds_is_snapshot);
1786 else
1787 return (B_FALSE);
1788 }
1789
1790 int
1791 dmu_snapshot_realname(objset_t *os, char *name, char *real, int maxlen,
1792 boolean_t *conflict)
1793 {
1794 dsl_dataset_t *ds = os->os_dsl_dataset;
1795 uint64_t ignored;
1796
1797 if (dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0)
1798 return (SET_ERROR(ENOENT));
1799
1800 return (zap_lookup_norm(ds->ds_dir->dd_pool->dp_meta_objset,
1801 dsl_dataset_phys(ds)->ds_snapnames_zapobj, name, 8, 1, &ignored,
1802 MT_NORMALIZE, real, maxlen, conflict));
1803 }
1804
1805 int
1806 dmu_clone_list_next(objset_t *os, int len, char *name,
1807 uint64_t *idp, uint64_t *offp)
1808 {
1809 dsl_dataset_t *ds = os->os_dsl_dataset, *clone;
1810 zap_cursor_t cursor;
1811 zap_attribute_t attr;
1812 char buf[MAXNAMELEN];
1813
1814 ASSERT(dsl_pool_config_held(dmu_objset_pool(os)));
1815
1816 if (dsl_dataset_phys(ds)->ds_next_clones_obj == 0)
1817 return (SET_ERROR(ENOENT));
1818
1819 zap_cursor_init_serialized(&cursor,
1820 ds->ds_dir->dd_pool->dp_meta_objset,
1821 dsl_dataset_phys(ds)->ds_next_clones_obj, *offp);
1822
1823 if (zap_cursor_retrieve(&cursor, &attr) != 0) {
1824 zap_cursor_fini(&cursor);
1825 return (SET_ERROR(ENOENT));
1826 }
1827
1828 VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
1829 attr.za_first_integer, FTAG, &clone));
1830
1831 dsl_dir_name(clone->ds_dir, buf);
1832
1833 dsl_dataset_rele(clone, FTAG);
1834
1835 if (strlen(buf) >= len) {
1836 zap_cursor_fini(&cursor);
1837 return (SET_ERROR(ENAMETOOLONG));
1838 }
1839
1840 (void) strcpy(name, buf);
1841 if (idp != NULL)
1842 *idp = attr.za_first_integer;
1843
1844 zap_cursor_advance(&cursor);
1845 *offp = zap_cursor_serialize(&cursor);
1846 zap_cursor_fini(&cursor);
1847
1848 return (0);
1849 }
1850
1851 int
1852 dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
1853 uint64_t *idp, uint64_t *offp, boolean_t *case_conflict)
1854 {
1855 dsl_dataset_t *ds = os->os_dsl_dataset;
1856 zap_cursor_t cursor;
1857 zap_attribute_t attr;
1858
1859 ASSERT(dsl_pool_config_held(dmu_objset_pool(os)));
1860
1861 if (dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0)
1862 return (SET_ERROR(ENOENT));
1863
1864 zap_cursor_init_serialized(&cursor,
1865 ds->ds_dir->dd_pool->dp_meta_objset,
1866 dsl_dataset_phys(ds)->ds_snapnames_zapobj, *offp);
1867
1868 if (zap_cursor_retrieve(&cursor, &attr) != 0) {
1869 zap_cursor_fini(&cursor);
1870 return (SET_ERROR(ENOENT));
|