Print this page
5056 ZFS deadlock on db_mtx and dn_holds
Reviewed by: Will Andrews <willa@spectralogic.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Approved by: Dan McDonald <danmcd@omniti.com>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/zfs/dmu_objset.c
+++ new/usr/src/uts/common/fs/zfs/dmu_objset.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
|
↓ open down ↓ |
15 lines elided |
↑ open up ↑ |
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
24 24 * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
25 25 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
26 + * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
26 27 */
27 28
28 29 /* Portions Copyright 2010 Robert Milkowski */
29 30
30 31 #include <sys/cred.h>
31 32 #include <sys/zfs_context.h>
32 33 #include <sys/dmu_objset.h>
33 34 #include <sys/dsl_dir.h>
34 35 #include <sys/dsl_dataset.h>
35 36 #include <sys/dsl_prop.h>
36 37 #include <sys/dsl_pool.h>
37 38 #include <sys/dsl_synctask.h>
38 39 #include <sys/dsl_deleg.h>
39 40 #include <sys/dnode.h>
40 41 #include <sys/dbuf.h>
41 42 #include <sys/zvol.h>
42 43 #include <sys/dmu_tx.h>
43 44 #include <sys/zap.h>
44 45 #include <sys/zil.h>
45 46 #include <sys/dmu_impl.h>
46 47 #include <sys/zfs_ioctl.h>
47 48 #include <sys/sa.h>
48 49 #include <sys/zfs_onexit.h>
49 50 #include <sys/dsl_destroy.h>
50 51
51 52 /*
52 53 * Needed to close a window in dnode_move() that allows the objset to be freed
53 54 * before it can be safely accessed.
54 55 */
55 56 krwlock_t os_lock;
56 57
57 58 void
58 59 dmu_objset_init(void)
59 60 {
60 61 rw_init(&os_lock, NULL, RW_DEFAULT, NULL);
61 62 }
62 63
63 64 void
64 65 dmu_objset_fini(void)
65 66 {
66 67 rw_destroy(&os_lock);
67 68 }
68 69
69 70 spa_t *
70 71 dmu_objset_spa(objset_t *os)
71 72 {
72 73 return (os->os_spa);
73 74 }
74 75
75 76 zilog_t *
76 77 dmu_objset_zil(objset_t *os)
77 78 {
78 79 return (os->os_zil);
79 80 }
80 81
81 82 dsl_pool_t *
82 83 dmu_objset_pool(objset_t *os)
83 84 {
84 85 dsl_dataset_t *ds;
85 86
86 87 if ((ds = os->os_dsl_dataset) != NULL && ds->ds_dir)
87 88 return (ds->ds_dir->dd_pool);
88 89 else
89 90 return (spa_get_dsl(os->os_spa));
90 91 }
91 92
92 93 dsl_dataset_t *
93 94 dmu_objset_ds(objset_t *os)
94 95 {
95 96 return (os->os_dsl_dataset);
96 97 }
97 98
98 99 dmu_objset_type_t
99 100 dmu_objset_type(objset_t *os)
100 101 {
101 102 return (os->os_phys->os_type);
102 103 }
103 104
104 105 void
105 106 dmu_objset_name(objset_t *os, char *buf)
106 107 {
107 108 dsl_dataset_name(os->os_dsl_dataset, buf);
108 109 }
109 110
110 111 uint64_t
111 112 dmu_objset_id(objset_t *os)
112 113 {
113 114 dsl_dataset_t *ds = os->os_dsl_dataset;
114 115
115 116 return (ds ? ds->ds_object : 0);
116 117 }
117 118
118 119 zfs_sync_type_t
119 120 dmu_objset_syncprop(objset_t *os)
120 121 {
121 122 return (os->os_sync);
122 123 }
123 124
124 125 zfs_logbias_op_t
125 126 dmu_objset_logbias(objset_t *os)
126 127 {
127 128 return (os->os_logbias);
128 129 }
129 130
130 131 static void
131 132 checksum_changed_cb(void *arg, uint64_t newval)
132 133 {
133 134 objset_t *os = arg;
134 135
135 136 /*
136 137 * Inheritance should have been done by now.
137 138 */
138 139 ASSERT(newval != ZIO_CHECKSUM_INHERIT);
139 140
140 141 os->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE);
141 142 }
142 143
143 144 static void
144 145 compression_changed_cb(void *arg, uint64_t newval)
145 146 {
146 147 objset_t *os = arg;
147 148
148 149 /*
149 150 * Inheritance and range checking should have been done by now.
150 151 */
151 152 ASSERT(newval != ZIO_COMPRESS_INHERIT);
152 153
153 154 os->os_compress = zio_compress_select(newval, ZIO_COMPRESS_ON_VALUE);
154 155 }
155 156
156 157 static void
157 158 copies_changed_cb(void *arg, uint64_t newval)
158 159 {
159 160 objset_t *os = arg;
160 161
161 162 /*
162 163 * Inheritance and range checking should have been done by now.
163 164 */
164 165 ASSERT(newval > 0);
165 166 ASSERT(newval <= spa_max_replication(os->os_spa));
166 167
167 168 os->os_copies = newval;
168 169 }
169 170
170 171 static void
171 172 dedup_changed_cb(void *arg, uint64_t newval)
172 173 {
173 174 objset_t *os = arg;
174 175 spa_t *spa = os->os_spa;
175 176 enum zio_checksum checksum;
176 177
177 178 /*
178 179 * Inheritance should have been done by now.
179 180 */
180 181 ASSERT(newval != ZIO_CHECKSUM_INHERIT);
181 182
182 183 checksum = zio_checksum_dedup_select(spa, newval, ZIO_CHECKSUM_OFF);
183 184
184 185 os->os_dedup_checksum = checksum & ZIO_CHECKSUM_MASK;
185 186 os->os_dedup_verify = !!(checksum & ZIO_CHECKSUM_VERIFY);
186 187 }
187 188
188 189 static void
189 190 primary_cache_changed_cb(void *arg, uint64_t newval)
190 191 {
191 192 objset_t *os = arg;
192 193
193 194 /*
194 195 * Inheritance and range checking should have been done by now.
195 196 */
196 197 ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE ||
197 198 newval == ZFS_CACHE_METADATA);
198 199
199 200 os->os_primary_cache = newval;
200 201 }
201 202
202 203 static void
203 204 secondary_cache_changed_cb(void *arg, uint64_t newval)
204 205 {
205 206 objset_t *os = arg;
206 207
207 208 /*
208 209 * Inheritance and range checking should have been done by now.
209 210 */
210 211 ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE ||
211 212 newval == ZFS_CACHE_METADATA);
212 213
213 214 os->os_secondary_cache = newval;
214 215 }
215 216
216 217 static void
217 218 sync_changed_cb(void *arg, uint64_t newval)
218 219 {
219 220 objset_t *os = arg;
220 221
221 222 /*
222 223 * Inheritance and range checking should have been done by now.
223 224 */
224 225 ASSERT(newval == ZFS_SYNC_STANDARD || newval == ZFS_SYNC_ALWAYS ||
225 226 newval == ZFS_SYNC_DISABLED);
226 227
227 228 os->os_sync = newval;
228 229 if (os->os_zil)
229 230 zil_set_sync(os->os_zil, newval);
230 231 }
231 232
232 233 static void
233 234 redundant_metadata_changed_cb(void *arg, uint64_t newval)
234 235 {
235 236 objset_t *os = arg;
236 237
237 238 /*
238 239 * Inheritance and range checking should have been done by now.
239 240 */
240 241 ASSERT(newval == ZFS_REDUNDANT_METADATA_ALL ||
241 242 newval == ZFS_REDUNDANT_METADATA_MOST);
242 243
243 244 os->os_redundant_metadata = newval;
244 245 }
245 246
246 247 static void
247 248 logbias_changed_cb(void *arg, uint64_t newval)
248 249 {
249 250 objset_t *os = arg;
250 251
251 252 ASSERT(newval == ZFS_LOGBIAS_LATENCY ||
252 253 newval == ZFS_LOGBIAS_THROUGHPUT);
253 254 os->os_logbias = newval;
254 255 if (os->os_zil)
255 256 zil_set_logbias(os->os_zil, newval);
256 257 }
257 258
258 259 static void
259 260 recordsize_changed_cb(void *arg, uint64_t newval)
260 261 {
261 262 objset_t *os = arg;
262 263
263 264 os->os_recordsize = newval;
264 265 }
265 266
266 267 void
267 268 dmu_objset_byteswap(void *buf, size_t size)
268 269 {
269 270 objset_phys_t *osp = buf;
270 271
271 272 ASSERT(size == OBJSET_OLD_PHYS_SIZE || size == sizeof (objset_phys_t));
272 273 dnode_byteswap(&osp->os_meta_dnode);
273 274 byteswap_uint64_array(&osp->os_zil_header, sizeof (zil_header_t));
274 275 osp->os_type = BSWAP_64(osp->os_type);
275 276 osp->os_flags = BSWAP_64(osp->os_flags);
276 277 if (size == sizeof (objset_phys_t)) {
277 278 dnode_byteswap(&osp->os_userused_dnode);
278 279 dnode_byteswap(&osp->os_groupused_dnode);
279 280 }
280 281 }
281 282
282 283 int
283 284 dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
284 285 objset_t **osp)
285 286 {
286 287 objset_t *os;
287 288 int i, err;
288 289
289 290 ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock));
290 291
291 292 os = kmem_zalloc(sizeof (objset_t), KM_SLEEP);
292 293 os->os_dsl_dataset = ds;
293 294 os->os_spa = spa;
294 295 os->os_rootbp = bp;
295 296 if (!BP_IS_HOLE(os->os_rootbp)) {
296 297 arc_flags_t aflags = ARC_FLAG_WAIT;
297 298 zbookmark_phys_t zb;
298 299 SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
299 300 ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
300 301
301 302 if (DMU_OS_IS_L2CACHEABLE(os))
302 303 aflags |= ARC_FLAG_L2CACHE;
303 304 if (DMU_OS_IS_L2COMPRESSIBLE(os))
304 305 aflags |= ARC_FLAG_L2COMPRESS;
305 306
306 307 dprintf_bp(os->os_rootbp, "reading %s", "");
307 308 err = arc_read(NULL, spa, os->os_rootbp,
308 309 arc_getbuf_func, &os->os_phys_buf,
309 310 ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb);
310 311 if (err != 0) {
311 312 kmem_free(os, sizeof (objset_t));
312 313 /* convert checksum errors into IO errors */
313 314 if (err == ECKSUM)
314 315 err = SET_ERROR(EIO);
315 316 return (err);
316 317 }
317 318
318 319 /* Increase the blocksize if we are permitted. */
319 320 if (spa_version(spa) >= SPA_VERSION_USERSPACE &&
320 321 arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) {
321 322 arc_buf_t *buf = arc_buf_alloc(spa,
322 323 sizeof (objset_phys_t), &os->os_phys_buf,
323 324 ARC_BUFC_METADATA);
324 325 bzero(buf->b_data, sizeof (objset_phys_t));
325 326 bcopy(os->os_phys_buf->b_data, buf->b_data,
326 327 arc_buf_size(os->os_phys_buf));
327 328 (void) arc_buf_remove_ref(os->os_phys_buf,
328 329 &os->os_phys_buf);
329 330 os->os_phys_buf = buf;
330 331 }
331 332
332 333 os->os_phys = os->os_phys_buf->b_data;
333 334 os->os_flags = os->os_phys->os_flags;
334 335 } else {
335 336 int size = spa_version(spa) >= SPA_VERSION_USERSPACE ?
336 337 sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE;
337 338 os->os_phys_buf = arc_buf_alloc(spa, size,
338 339 &os->os_phys_buf, ARC_BUFC_METADATA);
339 340 os->os_phys = os->os_phys_buf->b_data;
340 341 bzero(os->os_phys, size);
341 342 }
342 343
343 344 /*
344 345 * Note: the changed_cb will be called once before the register
345 346 * func returns, thus changing the checksum/compression from the
346 347 * default (fletcher2/off). Snapshots don't need to know about
347 348 * checksum/compression/copies.
|
↓ open down ↓ |
312 lines elided |
↑ open up ↑ |
348 349 */
349 350 if (ds != NULL) {
350 351 err = dsl_prop_register(ds,
351 352 zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
352 353 primary_cache_changed_cb, os);
353 354 if (err == 0) {
354 355 err = dsl_prop_register(ds,
355 356 zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE),
356 357 secondary_cache_changed_cb, os);
357 358 }
358 - if (!dsl_dataset_is_snapshot(ds)) {
359 + if (!ds->ds_is_snapshot) {
359 360 if (err == 0) {
360 361 err = dsl_prop_register(ds,
361 362 zfs_prop_to_name(ZFS_PROP_CHECKSUM),
362 363 checksum_changed_cb, os);
363 364 }
364 365 if (err == 0) {
365 366 err = dsl_prop_register(ds,
366 367 zfs_prop_to_name(ZFS_PROP_COMPRESSION),
367 368 compression_changed_cb, os);
368 369 }
369 370 if (err == 0) {
370 371 err = dsl_prop_register(ds,
371 372 zfs_prop_to_name(ZFS_PROP_COPIES),
372 373 copies_changed_cb, os);
373 374 }
374 375 if (err == 0) {
375 376 err = dsl_prop_register(ds,
376 377 zfs_prop_to_name(ZFS_PROP_DEDUP),
377 378 dedup_changed_cb, os);
378 379 }
379 380 if (err == 0) {
380 381 err = dsl_prop_register(ds,
381 382 zfs_prop_to_name(ZFS_PROP_LOGBIAS),
382 383 logbias_changed_cb, os);
383 384 }
384 385 if (err == 0) {
385 386 err = dsl_prop_register(ds,
386 387 zfs_prop_to_name(ZFS_PROP_SYNC),
387 388 sync_changed_cb, os);
388 389 }
389 390 if (err == 0) {
390 391 err = dsl_prop_register(ds,
391 392 zfs_prop_to_name(
392 393 ZFS_PROP_REDUNDANT_METADATA),
393 394 redundant_metadata_changed_cb, os);
394 395 }
395 396 if (err == 0) {
396 397 err = dsl_prop_register(ds,
397 398 zfs_prop_to_name(ZFS_PROP_RECORDSIZE),
398 399 recordsize_changed_cb, os);
399 400 }
400 401 }
401 402 if (err != 0) {
402 403 VERIFY(arc_buf_remove_ref(os->os_phys_buf,
403 404 &os->os_phys_buf));
404 405 kmem_free(os, sizeof (objset_t));
405 406 return (err);
406 407 }
407 408 } else {
408 409 /* It's the meta-objset. */
409 410 os->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
|
↓ open down ↓ |
41 lines elided |
↑ open up ↑ |
410 411 os->os_compress = ZIO_COMPRESS_LZJB;
411 412 os->os_copies = spa_max_replication(spa);
412 413 os->os_dedup_checksum = ZIO_CHECKSUM_OFF;
413 414 os->os_dedup_verify = B_FALSE;
414 415 os->os_logbias = ZFS_LOGBIAS_LATENCY;
415 416 os->os_sync = ZFS_SYNC_STANDARD;
416 417 os->os_primary_cache = ZFS_CACHE_ALL;
417 418 os->os_secondary_cache = ZFS_CACHE_ALL;
418 419 }
419 420
420 - if (ds == NULL || !dsl_dataset_is_snapshot(ds))
421 + if (ds == NULL || !ds->ds_is_snapshot)
421 422 os->os_zil_header = os->os_phys->os_zil_header;
422 423 os->os_zil = zil_alloc(os, &os->os_zil_header);
423 424
424 425 for (i = 0; i < TXG_SIZE; i++) {
425 426 list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t),
426 427 offsetof(dnode_t, dn_dirty_link[i]));
427 428 list_create(&os->os_free_dnodes[i], sizeof (dnode_t),
428 429 offsetof(dnode_t, dn_dirty_link[i]));
429 430 }
430 431 list_create(&os->os_dnodes, sizeof (dnode_t),
431 432 offsetof(dnode_t, dn_link));
432 433 list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t),
433 434 offsetof(dmu_buf_impl_t, db_link));
434 435
435 436 mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL);
436 437 mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);
437 438 mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL);
438 439
439 - DMU_META_DNODE(os) = dnode_special_open(os,
440 - &os->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT,
441 - &os->os_meta_dnode);
440 + dnode_special_open(os, &os->os_phys->os_meta_dnode,
441 + DMU_META_DNODE_OBJECT, &os->os_meta_dnode);
442 442 if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) {
443 - DMU_USERUSED_DNODE(os) = dnode_special_open(os,
444 - &os->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT,
445 - &os->os_userused_dnode);
446 - DMU_GROUPUSED_DNODE(os) = dnode_special_open(os,
447 - &os->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT,
448 - &os->os_groupused_dnode);
443 + dnode_special_open(os, &os->os_phys->os_userused_dnode,
444 + DMU_USERUSED_OBJECT, &os->os_userused_dnode);
445 + dnode_special_open(os, &os->os_phys->os_groupused_dnode,
446 + DMU_GROUPUSED_OBJECT, &os->os_groupused_dnode);
449 447 }
450 448
451 449 *osp = os;
452 450 return (0);
453 451 }
454 452
455 453 int
456 454 dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp)
457 455 {
458 456 int err = 0;
459 457
460 458 mutex_enter(&ds->ds_opening_lock);
461 459 if (ds->ds_objset == NULL) {
462 460 objset_t *os;
463 461 err = dmu_objset_open_impl(dsl_dataset_get_spa(ds),
464 462 ds, dsl_dataset_get_blkptr(ds), &os);
465 463
466 464 if (err == 0) {
467 465 mutex_enter(&ds->ds_lock);
468 466 ASSERT(ds->ds_objset == NULL);
469 467 ds->ds_objset = os;
470 468 mutex_exit(&ds->ds_lock);
471 469 }
472 470 }
473 471 *osp = ds->ds_objset;
474 472 mutex_exit(&ds->ds_opening_lock);
475 473 return (err);
476 474 }
477 475
478 476 /*
479 477 * Holds the pool while the objset is held. Therefore only one objset
480 478 * can be held at a time.
481 479 */
482 480 int
483 481 dmu_objset_hold(const char *name, void *tag, objset_t **osp)
484 482 {
485 483 dsl_pool_t *dp;
486 484 dsl_dataset_t *ds;
487 485 int err;
488 486
489 487 err = dsl_pool_hold(name, tag, &dp);
490 488 if (err != 0)
491 489 return (err);
492 490 err = dsl_dataset_hold(dp, name, tag, &ds);
493 491 if (err != 0) {
494 492 dsl_pool_rele(dp, tag);
495 493 return (err);
496 494 }
497 495
498 496 err = dmu_objset_from_ds(ds, osp);
499 497 if (err != 0) {
500 498 dsl_dataset_rele(ds, tag);
501 499 dsl_pool_rele(dp, tag);
502 500 }
503 501
504 502 return (err);
505 503 }
506 504
507 505 /*
508 506 * dsl_pool must not be held when this is called.
509 507 * Upon successful return, there will be a longhold on the dataset,
510 508 * and the dsl_pool will not be held.
511 509 */
512 510 int
513 511 dmu_objset_own(const char *name, dmu_objset_type_t type,
514 512 boolean_t readonly, void *tag, objset_t **osp)
515 513 {
516 514 dsl_pool_t *dp;
517 515 dsl_dataset_t *ds;
518 516 int err;
519 517
520 518 err = dsl_pool_hold(name, FTAG, &dp);
521 519 if (err != 0)
522 520 return (err);
523 521 err = dsl_dataset_own(dp, name, tag, &ds);
524 522 if (err != 0) {
525 523 dsl_pool_rele(dp, FTAG);
|
↓ open down ↓ |
67 lines elided |
↑ open up ↑ |
526 524 return (err);
527 525 }
528 526
529 527 err = dmu_objset_from_ds(ds, osp);
530 528 dsl_pool_rele(dp, FTAG);
531 529 if (err != 0) {
532 530 dsl_dataset_disown(ds, tag);
533 531 } else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) {
534 532 dsl_dataset_disown(ds, tag);
535 533 return (SET_ERROR(EINVAL));
536 - } else if (!readonly && dsl_dataset_is_snapshot(ds)) {
534 + } else if (!readonly && ds->ds_is_snapshot) {
537 535 dsl_dataset_disown(ds, tag);
538 536 return (SET_ERROR(EROFS));
539 537 }
540 538 return (err);
541 539 }
542 540
543 541 void
544 542 dmu_objset_rele(objset_t *os, void *tag)
545 543 {
546 544 dsl_pool_t *dp = dmu_objset_pool(os);
547 545 dsl_dataset_rele(os->os_dsl_dataset, tag);
548 546 dsl_pool_rele(dp, tag);
549 547 }
550 548
551 549 /*
552 550 * When we are called, os MUST refer to an objset associated with a dataset
553 551 * that is owned by 'tag'; that is, is held and long held by 'tag' and ds_owner
554 552 * == tag. We will then release and reacquire ownership of the dataset while
555 553 * holding the pool config_rwlock to avoid intervening namespace or ownership
556 554 * changes may occur.
557 555 *
558 556 * This exists solely to accommodate zfs_ioc_userspace_upgrade()'s desire to
559 557 * release the hold on its dataset and acquire a new one on the dataset of the
560 558 * same name so that it can be partially torn down and reconstructed.
561 559 */
562 560 void
563 561 dmu_objset_refresh_ownership(objset_t *os, void *tag)
564 562 {
565 563 dsl_pool_t *dp;
566 564 dsl_dataset_t *ds, *newds;
567 565 char name[MAXNAMELEN];
568 566
569 567 ds = os->os_dsl_dataset;
570 568 VERIFY3P(ds, !=, NULL);
571 569 VERIFY3P(ds->ds_owner, ==, tag);
572 570 VERIFY(dsl_dataset_long_held(ds));
573 571
574 572 dsl_dataset_name(ds, name);
575 573 dp = dmu_objset_pool(os);
576 574 dsl_pool_config_enter(dp, FTAG);
577 575 dmu_objset_disown(os, tag);
578 576 VERIFY0(dsl_dataset_own(dp, name, tag, &newds));
579 577 VERIFY3P(newds, ==, os->os_dsl_dataset);
580 578 dsl_pool_config_exit(dp, FTAG);
581 579 }
|
↓ open down ↓ |
35 lines elided |
↑ open up ↑ |
582 580
583 581 void
584 582 dmu_objset_disown(objset_t *os, void *tag)
585 583 {
586 584 dsl_dataset_disown(os->os_dsl_dataset, tag);
587 585 }
588 586
589 587 void
590 588 dmu_objset_evict_dbufs(objset_t *os)
591 589 {
590 + dnode_t dn_marker;
592 591 dnode_t *dn;
593 592
594 593 mutex_enter(&os->os_lock);
594 + dn = list_head(&os->os_dnodes);
595 + while (dn != NULL) {
596 + /*
597 + * Skip dnodes without holds. We have to do this dance
598 + * because dnode_add_ref() only works if there is already a
599 + * hold. If the dnode has no holds, then it has no dbufs.
600 + */
601 + if (dnode_add_ref(dn, FTAG)) {
602 + list_insert_after(&os->os_dnodes, dn, &dn_marker);
603 + mutex_exit(&os->os_lock);
595 604
596 - /* process the mdn last, since the other dnodes have holds on it */
597 - list_remove(&os->os_dnodes, DMU_META_DNODE(os));
598 - list_insert_tail(&os->os_dnodes, DMU_META_DNODE(os));
605 + dnode_evict_dbufs(dn);
606 + dnode_rele(dn, FTAG);
599 607
600 - /*
601 - * Find the first dnode with holds. We have to do this dance
602 - * because dnode_add_ref() only works if you already have a
603 - * hold. If there are no holds then it has no dbufs so OK to
604 - * skip.
605 - */
606 - for (dn = list_head(&os->os_dnodes);
607 - dn && !dnode_add_ref(dn, FTAG);
608 - dn = list_next(&os->os_dnodes, dn))
609 - continue;
610 -
611 - while (dn) {
612 - dnode_t *next_dn = dn;
613 -
614 - do {
615 - next_dn = list_next(&os->os_dnodes, next_dn);
616 - } while (next_dn && !dnode_add_ref(next_dn, FTAG));
617 -
618 - mutex_exit(&os->os_lock);
619 - dnode_evict_dbufs(dn);
620 - dnode_rele(dn, FTAG);
621 - mutex_enter(&os->os_lock);
622 - dn = next_dn;
608 + mutex_enter(&os->os_lock);
609 + dn = list_next(&os->os_dnodes, &dn_marker);
610 + list_remove(&os->os_dnodes, &dn_marker);
611 + } else {
612 + dn = list_next(&os->os_dnodes, dn);
613 + }
623 614 }
624 615 mutex_exit(&os->os_lock);
616 +
617 + if (DMU_USERUSED_DNODE(os) != NULL) {
618 + dnode_evict_dbufs(DMU_GROUPUSED_DNODE(os));
619 + dnode_evict_dbufs(DMU_USERUSED_DNODE(os));
620 + }
621 + dnode_evict_dbufs(DMU_META_DNODE(os));
625 622 }
626 623
624 +/*
625 + * Objset eviction processing is split into into two pieces.
626 + * The first marks the objset as evicting, evicts any dbufs that
627 + * have a refcount of zero, and then queues up the objset for the
628 + * second phase of eviction. Once os->os_dnodes has been cleared by
629 + * dnode_buf_pageout()->dnode_destroy(), the second phase is executed.
630 + * The second phase closes the special dnodes, dequeues the objset from
631 + * the list of those undergoing eviction, and finally frees the objset.
632 + *
633 + * NOTE: Due to asynchronous eviction processing (invocation of
634 + * dnode_buf_pageout()), it is possible for the meta dnode for the
635 + * objset to have no holds even though os->os_dnodes is not empty.
636 + */
627 637 void
628 638 dmu_objset_evict(objset_t *os)
629 639 {
630 640 dsl_dataset_t *ds = os->os_dsl_dataset;
631 641
632 642 for (int t = 0; t < TXG_SIZE; t++)
633 643 ASSERT(!dmu_objset_is_dirty(os, t));
634 644
635 645 if (ds) {
636 - if (!dsl_dataset_is_snapshot(ds)) {
646 + if (!ds->ds_is_snapshot) {
637 647 VERIFY0(dsl_prop_unregister(ds,
638 648 zfs_prop_to_name(ZFS_PROP_CHECKSUM),
639 649 checksum_changed_cb, os));
640 650 VERIFY0(dsl_prop_unregister(ds,
641 651 zfs_prop_to_name(ZFS_PROP_COMPRESSION),
642 652 compression_changed_cb, os));
643 653 VERIFY0(dsl_prop_unregister(ds,
644 654 zfs_prop_to_name(ZFS_PROP_COPIES),
645 655 copies_changed_cb, os));
646 656 VERIFY0(dsl_prop_unregister(ds,
647 657 zfs_prop_to_name(ZFS_PROP_DEDUP),
648 658 dedup_changed_cb, os));
649 659 VERIFY0(dsl_prop_unregister(ds,
650 660 zfs_prop_to_name(ZFS_PROP_LOGBIAS),
651 661 logbias_changed_cb, os));
652 662 VERIFY0(dsl_prop_unregister(ds,
653 663 zfs_prop_to_name(ZFS_PROP_SYNC),
654 664 sync_changed_cb, os));
655 665 VERIFY0(dsl_prop_unregister(ds,
656 666 zfs_prop_to_name(ZFS_PROP_REDUNDANT_METADATA),
657 667 redundant_metadata_changed_cb, os));
658 668 VERIFY0(dsl_prop_unregister(ds,
659 669 zfs_prop_to_name(ZFS_PROP_RECORDSIZE),
660 670 recordsize_changed_cb, os));
661 671 }
662 672 VERIFY0(dsl_prop_unregister(ds,
|
↓ open down ↓ |
16 lines elided |
↑ open up ↑ |
663 673 zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
664 674 primary_cache_changed_cb, os));
665 675 VERIFY0(dsl_prop_unregister(ds,
666 676 zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE),
667 677 secondary_cache_changed_cb, os));
668 678 }
669 679
670 680 if (os->os_sa)
671 681 sa_tear_down(os);
672 682
683 + os->os_evicting = B_TRUE;
673 684 dmu_objset_evict_dbufs(os);
674 685
686 + mutex_enter(&os->os_lock);
687 + spa_evicting_os_register(os->os_spa, os);
688 + if (list_is_empty(&os->os_dnodes)) {
689 + mutex_exit(&os->os_lock);
690 + dmu_objset_evict_done(os);
691 + } else {
692 + mutex_exit(&os->os_lock);
693 + }
694 +}
695 +
696 +void
697 +dmu_objset_evict_done(objset_t *os)
698 +{
699 + ASSERT3P(list_head(&os->os_dnodes), ==, NULL);
700 +
675 701 dnode_special_close(&os->os_meta_dnode);
676 702 if (DMU_USERUSED_DNODE(os)) {
677 703 dnode_special_close(&os->os_userused_dnode);
678 704 dnode_special_close(&os->os_groupused_dnode);
679 705 }
680 706 zil_free(os->os_zil);
681 707
682 - ASSERT3P(list_head(&os->os_dnodes), ==, NULL);
683 -
684 708 VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf));
685 709
686 710 /*
687 711 * This is a barrier to prevent the objset from going away in
688 712 * dnode_move() until we can safely ensure that the objset is still in
689 713 * use. We consider the objset valid before the barrier and invalid
690 714 * after the barrier.
691 715 */
692 716 rw_enter(&os_lock, RW_READER);
693 717 rw_exit(&os_lock);
694 718
695 719 mutex_destroy(&os->os_lock);
696 720 mutex_destroy(&os->os_obj_lock);
697 721 mutex_destroy(&os->os_user_ptr_lock);
722 + spa_evicting_os_deregister(os->os_spa, os);
698 723 kmem_free(os, sizeof (objset_t));
699 724 }
700 725
701 726 timestruc_t
702 727 dmu_objset_snap_cmtime(objset_t *os)
703 728 {
704 729 return (dsl_dir_snap_cmtime(os->os_dsl_dataset->ds_dir));
705 730 }
706 731
707 732 /* called from dsl for meta-objset */
708 733 objset_t *
709 734 dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
710 735 dmu_objset_type_t type, dmu_tx_t *tx)
711 736 {
712 737 objset_t *os;
713 738 dnode_t *mdn;
714 739
715 740 ASSERT(dmu_tx_is_syncing(tx));
716 741
717 742 if (ds != NULL)
718 743 VERIFY0(dmu_objset_from_ds(ds, &os));
719 744 else
720 745 VERIFY0(dmu_objset_open_impl(spa, NULL, bp, &os));
721 746
722 747 mdn = DMU_META_DNODE(os);
723 748
724 749 dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT,
725 750 DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx);
726 751
727 752 /*
728 753 * We don't want to have to increase the meta-dnode's nlevels
729 754 * later, because then we could do it in quescing context while
730 755 * we are also accessing it in open context.
731 756 *
732 757 * This precaution is not necessary for the MOS (ds == NULL),
733 758 * because the MOS is only updated in syncing context.
734 759 * This is most fortunate: the MOS is the only objset that
735 760 * needs to be synced multiple times as spa_sync() iterates
736 761 * to convergence, so minimizing its dn_nlevels matters.
737 762 */
738 763 if (ds != NULL) {
739 764 int levels = 1;
740 765
741 766 /*
742 767 * Determine the number of levels necessary for the meta-dnode
743 768 * to contain DN_MAX_OBJECT dnodes.
744 769 */
745 770 while ((uint64_t)mdn->dn_nblkptr << (mdn->dn_datablkshift +
746 771 (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) <
747 772 DN_MAX_OBJECT * sizeof (dnode_phys_t))
748 773 levels++;
749 774
750 775 mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] =
751 776 mdn->dn_nlevels = levels;
752 777 }
753 778
754 779 ASSERT(type != DMU_OST_NONE);
755 780 ASSERT(type != DMU_OST_ANY);
756 781 ASSERT(type < DMU_OST_NUMTYPES);
757 782 os->os_phys->os_type = type;
758 783 if (dmu_objset_userused_enabled(os)) {
759 784 os->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
760 785 os->os_flags = os->os_phys->os_flags;
761 786 }
762 787
763 788 dsl_dataset_dirty(ds, tx);
764 789
765 790 return (os);
766 791 }
767 792
768 793 typedef struct dmu_objset_create_arg {
769 794 const char *doca_name;
770 795 cred_t *doca_cred;
771 796 void (*doca_userfunc)(objset_t *os, void *arg,
772 797 cred_t *cr, dmu_tx_t *tx);
773 798 void *doca_userarg;
774 799 dmu_objset_type_t doca_type;
775 800 uint64_t doca_flags;
776 801 } dmu_objset_create_arg_t;
777 802
778 803 /*ARGSUSED*/
779 804 static int
780 805 dmu_objset_create_check(void *arg, dmu_tx_t *tx)
781 806 {
782 807 dmu_objset_create_arg_t *doca = arg;
783 808 dsl_pool_t *dp = dmu_tx_pool(tx);
784 809 dsl_dir_t *pdd;
785 810 const char *tail;
786 811 int error;
787 812
788 813 if (strchr(doca->doca_name, '@') != NULL)
789 814 return (SET_ERROR(EINVAL));
790 815
791 816 error = dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail);
792 817 if (error != 0)
793 818 return (error);
794 819 if (tail == NULL) {
795 820 dsl_dir_rele(pdd, FTAG);
796 821 return (SET_ERROR(EEXIST));
797 822 }
798 823 error = dsl_fs_ss_limit_check(pdd, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL,
799 824 doca->doca_cred);
800 825 dsl_dir_rele(pdd, FTAG);
801 826
802 827 return (error);
803 828 }
804 829
805 830 static void
806 831 dmu_objset_create_sync(void *arg, dmu_tx_t *tx)
807 832 {
808 833 dmu_objset_create_arg_t *doca = arg;
809 834 dsl_pool_t *dp = dmu_tx_pool(tx);
810 835 dsl_dir_t *pdd;
811 836 const char *tail;
812 837 dsl_dataset_t *ds;
813 838 uint64_t obj;
814 839 blkptr_t *bp;
815 840 objset_t *os;
816 841
817 842 VERIFY0(dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail));
818 843
819 844 obj = dsl_dataset_create_sync(pdd, tail, NULL, doca->doca_flags,
820 845 doca->doca_cred, tx);
821 846
822 847 VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds));
823 848 bp = dsl_dataset_get_blkptr(ds);
824 849 os = dmu_objset_create_impl(pdd->dd_pool->dp_spa,
825 850 ds, bp, doca->doca_type, tx);
826 851
827 852 if (doca->doca_userfunc != NULL) {
828 853 doca->doca_userfunc(os, doca->doca_userarg,
829 854 doca->doca_cred, tx);
830 855 }
831 856
832 857 spa_history_log_internal_ds(ds, "create", tx, "");
833 858 dsl_dataset_rele(ds, FTAG);
834 859 dsl_dir_rele(pdd, FTAG);
835 860 }
836 861
837 862 int
838 863 dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
839 864 void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg)
840 865 {
841 866 dmu_objset_create_arg_t doca;
842 867
843 868 doca.doca_name = name;
844 869 doca.doca_cred = CRED();
845 870 doca.doca_flags = flags;
846 871 doca.doca_userfunc = func;
847 872 doca.doca_userarg = arg;
848 873 doca.doca_type = type;
849 874
850 875 return (dsl_sync_task(name,
851 876 dmu_objset_create_check, dmu_objset_create_sync, &doca,
852 877 5, ZFS_SPACE_CHECK_NORMAL));
853 878 }
854 879
855 880 typedef struct dmu_objset_clone_arg {
856 881 const char *doca_clone;
857 882 const char *doca_origin;
858 883 cred_t *doca_cred;
859 884 } dmu_objset_clone_arg_t;
860 885
861 886 /*ARGSUSED*/
862 887 static int
863 888 dmu_objset_clone_check(void *arg, dmu_tx_t *tx)
864 889 {
865 890 dmu_objset_clone_arg_t *doca = arg;
866 891 dsl_dir_t *pdd;
867 892 const char *tail;
868 893 int error;
869 894 dsl_dataset_t *origin;
870 895 dsl_pool_t *dp = dmu_tx_pool(tx);
871 896
872 897 if (strchr(doca->doca_clone, '@') != NULL)
873 898 return (SET_ERROR(EINVAL));
874 899
875 900 error = dsl_dir_hold(dp, doca->doca_clone, FTAG, &pdd, &tail);
876 901 if (error != 0)
877 902 return (error);
878 903 if (tail == NULL) {
879 904 dsl_dir_rele(pdd, FTAG);
880 905 return (SET_ERROR(EEXIST));
881 906 }
882 907 /* You can't clone across pools. */
883 908 if (pdd->dd_pool != dp) {
884 909 dsl_dir_rele(pdd, FTAG);
885 910 return (SET_ERROR(EXDEV));
886 911 }
887 912 error = dsl_fs_ss_limit_check(pdd, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL,
888 913 doca->doca_cred);
889 914 if (error != 0) {
890 915 dsl_dir_rele(pdd, FTAG);
891 916 return (SET_ERROR(EDQUOT));
892 917 }
893 918 dsl_dir_rele(pdd, FTAG);
894 919
895 920 error = dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin);
|
↓ open down ↓ |
188 lines elided |
↑ open up ↑ |
896 921 if (error != 0)
897 922 return (error);
898 923
899 924 /* You can't clone across pools. */
900 925 if (origin->ds_dir->dd_pool != dp) {
901 926 dsl_dataset_rele(origin, FTAG);
902 927 return (SET_ERROR(EXDEV));
903 928 }
904 929
905 930 /* You can only clone snapshots, not the head datasets. */
906 - if (!dsl_dataset_is_snapshot(origin)) {
931 + if (!origin->ds_is_snapshot) {
907 932 dsl_dataset_rele(origin, FTAG);
908 933 return (SET_ERROR(EINVAL));
909 934 }
910 935 dsl_dataset_rele(origin, FTAG);
911 936
912 937 return (0);
913 938 }
914 939
915 940 static void
916 941 dmu_objset_clone_sync(void *arg, dmu_tx_t *tx)
917 942 {
918 943 dmu_objset_clone_arg_t *doca = arg;
919 944 dsl_pool_t *dp = dmu_tx_pool(tx);
920 945 dsl_dir_t *pdd;
921 946 const char *tail;
922 947 dsl_dataset_t *origin, *ds;
923 948 uint64_t obj;
924 949 char namebuf[MAXNAMELEN];
925 950
926 951 VERIFY0(dsl_dir_hold(dp, doca->doca_clone, FTAG, &pdd, &tail));
927 952 VERIFY0(dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin));
928 953
929 954 obj = dsl_dataset_create_sync(pdd, tail, origin, 0,
930 955 doca->doca_cred, tx);
931 956
932 957 VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds));
933 958 dsl_dataset_name(origin, namebuf);
934 959 spa_history_log_internal_ds(ds, "clone", tx,
935 960 "origin=%s (%llu)", namebuf, origin->ds_object);
936 961 dsl_dataset_rele(ds, FTAG);
937 962 dsl_dataset_rele(origin, FTAG);
938 963 dsl_dir_rele(pdd, FTAG);
939 964 }
940 965
941 966 int
942 967 dmu_objset_clone(const char *clone, const char *origin)
943 968 {
944 969 dmu_objset_clone_arg_t doca;
945 970
946 971 doca.doca_clone = clone;
947 972 doca.doca_origin = origin;
948 973 doca.doca_cred = CRED();
949 974
950 975 return (dsl_sync_task(clone,
951 976 dmu_objset_clone_check, dmu_objset_clone_sync, &doca,
952 977 5, ZFS_SPACE_CHECK_NORMAL));
953 978 }
954 979
955 980 int
956 981 dmu_objset_snapshot_one(const char *fsname, const char *snapname)
957 982 {
958 983 int err;
959 984 char *longsnap = kmem_asprintf("%s@%s", fsname, snapname);
960 985 nvlist_t *snaps = fnvlist_alloc();
961 986
962 987 fnvlist_add_boolean(snaps, longsnap);
963 988 strfree(longsnap);
964 989 err = dsl_dataset_snapshot(snaps, NULL, NULL);
965 990 fnvlist_free(snaps);
966 991 return (err);
967 992 }
968 993
969 994 static void
970 995 dmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx)
971 996 {
972 997 dnode_t *dn;
973 998
974 999 while (dn = list_head(list)) {
975 1000 ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);
976 1001 ASSERT(dn->dn_dbuf->db_data_pending);
977 1002 /*
978 1003 * Initialize dn_zio outside dnode_sync() because the
979 1004 * meta-dnode needs to set it ouside dnode_sync().
980 1005 */
981 1006 dn->dn_zio = dn->dn_dbuf->db_data_pending->dr_zio;
982 1007 ASSERT(dn->dn_zio);
983 1008
984 1009 ASSERT3U(dn->dn_nlevels, <=, DN_MAX_LEVELS);
985 1010 list_remove(list, dn);
986 1011
987 1012 if (newlist) {
988 1013 (void) dnode_add_ref(dn, newlist);
989 1014 list_insert_tail(newlist, dn);
990 1015 }
991 1016
992 1017 dnode_sync(dn, tx);
993 1018 }
994 1019 }
995 1020
996 1021 /* ARGSUSED */
997 1022 static void
998 1023 dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg)
999 1024 {
1000 1025 blkptr_t *bp = zio->io_bp;
1001 1026 objset_t *os = arg;
1002 1027 dnode_phys_t *dnp = &os->os_phys->os_meta_dnode;
1003 1028
1004 1029 ASSERT(!BP_IS_EMBEDDED(bp));
1005 1030 ASSERT3P(bp, ==, os->os_rootbp);
1006 1031 ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET);
1007 1032 ASSERT0(BP_GET_LEVEL(bp));
1008 1033
1009 1034 /*
1010 1035 * Update rootbp fill count: it should be the number of objects
1011 1036 * allocated in the object set (not counting the "special"
1012 1037 * objects that are stored in the objset_phys_t -- the meta
1013 1038 * dnode and user/group accounting objects).
1014 1039 */
1015 1040 bp->blk_fill = 0;
1016 1041 for (int i = 0; i < dnp->dn_nblkptr; i++)
1017 1042 bp->blk_fill += BP_GET_FILL(&dnp->dn_blkptr[i]);
1018 1043 }
1019 1044
1020 1045 /* ARGSUSED */
1021 1046 static void
1022 1047 dmu_objset_write_done(zio_t *zio, arc_buf_t *abuf, void *arg)
1023 1048 {
1024 1049 blkptr_t *bp = zio->io_bp;
1025 1050 blkptr_t *bp_orig = &zio->io_bp_orig;
1026 1051 objset_t *os = arg;
1027 1052
1028 1053 if (zio->io_flags & ZIO_FLAG_IO_REWRITE) {
1029 1054 ASSERT(BP_EQUAL(bp, bp_orig));
1030 1055 } else {
1031 1056 dsl_dataset_t *ds = os->os_dsl_dataset;
1032 1057 dmu_tx_t *tx = os->os_synctx;
1033 1058
1034 1059 (void) dsl_dataset_block_kill(ds, bp_orig, tx, B_TRUE);
1035 1060 dsl_dataset_block_born(ds, bp, tx);
1036 1061 }
1037 1062 }
1038 1063
1039 1064 /* called from dsl */
1040 1065 void
1041 1066 dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
1042 1067 {
1043 1068 int txgoff;
1044 1069 zbookmark_phys_t zb;
1045 1070 zio_prop_t zp;
1046 1071 zio_t *zio;
1047 1072 list_t *list;
1048 1073 list_t *newlist = NULL;
1049 1074 dbuf_dirty_record_t *dr;
1050 1075
1051 1076 dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg);
1052 1077
1053 1078 ASSERT(dmu_tx_is_syncing(tx));
1054 1079 /* XXX the write_done callback should really give us the tx... */
1055 1080 os->os_synctx = tx;
1056 1081
1057 1082 if (os->os_dsl_dataset == NULL) {
1058 1083 /*
1059 1084 * This is the MOS. If we have upgraded,
1060 1085 * spa_max_replication() could change, so reset
1061 1086 * os_copies here.
1062 1087 */
1063 1088 os->os_copies = spa_max_replication(os->os_spa);
1064 1089 }
1065 1090
1066 1091 /*
1067 1092 * Create the root block IO
1068 1093 */
1069 1094 SET_BOOKMARK(&zb, os->os_dsl_dataset ?
1070 1095 os->os_dsl_dataset->ds_object : DMU_META_OBJSET,
1071 1096 ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
1072 1097 arc_release(os->os_phys_buf, &os->os_phys_buf);
1073 1098
1074 1099 dmu_write_policy(os, NULL, 0, 0, &zp);
1075 1100
1076 1101 zio = arc_write(pio, os->os_spa, tx->tx_txg,
1077 1102 os->os_rootbp, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os),
1078 1103 DMU_OS_IS_L2COMPRESSIBLE(os), &zp, dmu_objset_write_ready,
1079 1104 NULL, dmu_objset_write_done, os, ZIO_PRIORITY_ASYNC_WRITE,
1080 1105 ZIO_FLAG_MUSTSUCCEED, &zb);
1081 1106
1082 1107 /*
1083 1108 * Sync special dnodes - the parent IO for the sync is the root block
1084 1109 */
1085 1110 DMU_META_DNODE(os)->dn_zio = zio;
1086 1111 dnode_sync(DMU_META_DNODE(os), tx);
1087 1112
1088 1113 os->os_phys->os_flags = os->os_flags;
1089 1114
1090 1115 if (DMU_USERUSED_DNODE(os) &&
1091 1116 DMU_USERUSED_DNODE(os)->dn_type != DMU_OT_NONE) {
1092 1117 DMU_USERUSED_DNODE(os)->dn_zio = zio;
1093 1118 dnode_sync(DMU_USERUSED_DNODE(os), tx);
1094 1119 DMU_GROUPUSED_DNODE(os)->dn_zio = zio;
1095 1120 dnode_sync(DMU_GROUPUSED_DNODE(os), tx);
1096 1121 }
1097 1122
1098 1123 txgoff = tx->tx_txg & TXG_MASK;
1099 1124
1100 1125 if (dmu_objset_userused_enabled(os)) {
1101 1126 newlist = &os->os_synced_dnodes;
1102 1127 /*
1103 1128 * We must create the list here because it uses the
1104 1129 * dn_dirty_link[] of this txg.
1105 1130 */
1106 1131 list_create(newlist, sizeof (dnode_t),
1107 1132 offsetof(dnode_t, dn_dirty_link[txgoff]));
1108 1133 }
1109 1134
1110 1135 dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx);
1111 1136 dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx);
1112 1137
1113 1138 list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff];
1114 1139 while (dr = list_head(list)) {
1115 1140 ASSERT0(dr->dr_dbuf->db_level);
1116 1141 list_remove(list, dr);
1117 1142 if (dr->dr_zio)
1118 1143 zio_nowait(dr->dr_zio);
1119 1144 }
1120 1145 /*
1121 1146 * Free intent log blocks up to this tx.
1122 1147 */
1123 1148 zil_sync(os->os_zil, tx);
1124 1149 os->os_phys->os_zil_header = os->os_zil_header;
1125 1150 zio_nowait(zio);
1126 1151 }
1127 1152
1128 1153 boolean_t
1129 1154 dmu_objset_is_dirty(objset_t *os, uint64_t txg)
1130 1155 {
1131 1156 return (!list_is_empty(&os->os_dirty_dnodes[txg & TXG_MASK]) ||
1132 1157 !list_is_empty(&os->os_free_dnodes[txg & TXG_MASK]));
1133 1158 }
1134 1159
1135 1160 static objset_used_cb_t *used_cbs[DMU_OST_NUMTYPES];
1136 1161
1137 1162 void
1138 1163 dmu_objset_register_type(dmu_objset_type_t ost, objset_used_cb_t *cb)
1139 1164 {
1140 1165 used_cbs[ost] = cb;
1141 1166 }
1142 1167
1143 1168 boolean_t
1144 1169 dmu_objset_userused_enabled(objset_t *os)
1145 1170 {
1146 1171 return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE &&
1147 1172 used_cbs[os->os_phys->os_type] != NULL &&
1148 1173 DMU_USERUSED_DNODE(os) != NULL);
1149 1174 }
1150 1175
1151 1176 static void
1152 1177 do_userquota_update(objset_t *os, uint64_t used, uint64_t flags,
1153 1178 uint64_t user, uint64_t group, boolean_t subtract, dmu_tx_t *tx)
1154 1179 {
1155 1180 if ((flags & DNODE_FLAG_USERUSED_ACCOUNTED)) {
1156 1181 int64_t delta = DNODE_SIZE + used;
1157 1182 if (subtract)
1158 1183 delta = -delta;
1159 1184 VERIFY3U(0, ==, zap_increment_int(os, DMU_USERUSED_OBJECT,
1160 1185 user, delta, tx));
1161 1186 VERIFY3U(0, ==, zap_increment_int(os, DMU_GROUPUSED_OBJECT,
1162 1187 group, delta, tx));
1163 1188 }
1164 1189 }
1165 1190
1166 1191 void
1167 1192 dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx)
1168 1193 {
1169 1194 dnode_t *dn;
1170 1195 list_t *list = &os->os_synced_dnodes;
1171 1196
1172 1197 ASSERT(list_head(list) == NULL || dmu_objset_userused_enabled(os));
1173 1198
1174 1199 while (dn = list_head(list)) {
1175 1200 int flags;
1176 1201 ASSERT(!DMU_OBJECT_IS_SPECIAL(dn->dn_object));
1177 1202 ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE ||
1178 1203 dn->dn_phys->dn_flags &
1179 1204 DNODE_FLAG_USERUSED_ACCOUNTED);
1180 1205
1181 1206 /* Allocate the user/groupused objects if necessary. */
1182 1207 if (DMU_USERUSED_DNODE(os)->dn_type == DMU_OT_NONE) {
1183 1208 VERIFY(0 == zap_create_claim(os,
1184 1209 DMU_USERUSED_OBJECT,
1185 1210 DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
1186 1211 VERIFY(0 == zap_create_claim(os,
1187 1212 DMU_GROUPUSED_OBJECT,
1188 1213 DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
1189 1214 }
1190 1215
1191 1216 /*
1192 1217 * We intentionally modify the zap object even if the
1193 1218 * net delta is zero. Otherwise
1194 1219 * the block of the zap obj could be shared between
1195 1220 * datasets but need to be different between them after
1196 1221 * a bprewrite.
1197 1222 */
1198 1223
1199 1224 flags = dn->dn_id_flags;
1200 1225 ASSERT(flags);
1201 1226 if (flags & DN_ID_OLD_EXIST) {
1202 1227 do_userquota_update(os, dn->dn_oldused, dn->dn_oldflags,
1203 1228 dn->dn_olduid, dn->dn_oldgid, B_TRUE, tx);
1204 1229 }
1205 1230 if (flags & DN_ID_NEW_EXIST) {
1206 1231 do_userquota_update(os, DN_USED_BYTES(dn->dn_phys),
1207 1232 dn->dn_phys->dn_flags, dn->dn_newuid,
1208 1233 dn->dn_newgid, B_FALSE, tx);
1209 1234 }
1210 1235
1211 1236 mutex_enter(&dn->dn_mtx);
1212 1237 dn->dn_oldused = 0;
1213 1238 dn->dn_oldflags = 0;
1214 1239 if (dn->dn_id_flags & DN_ID_NEW_EXIST) {
1215 1240 dn->dn_olduid = dn->dn_newuid;
1216 1241 dn->dn_oldgid = dn->dn_newgid;
1217 1242 dn->dn_id_flags |= DN_ID_OLD_EXIST;
1218 1243 if (dn->dn_bonuslen == 0)
1219 1244 dn->dn_id_flags |= DN_ID_CHKED_SPILL;
1220 1245 else
1221 1246 dn->dn_id_flags |= DN_ID_CHKED_BONUS;
1222 1247 }
1223 1248 dn->dn_id_flags &= ~(DN_ID_NEW_EXIST);
1224 1249 mutex_exit(&dn->dn_mtx);
1225 1250
1226 1251 list_remove(list, dn);
1227 1252 dnode_rele(dn, list);
1228 1253 }
1229 1254 }
1230 1255
1231 1256 /*
1232 1257 * Returns a pointer to data to find uid/gid from
1233 1258 *
1234 1259 * If a dirty record for transaction group that is syncing can't
1235 1260 * be found then NULL is returned. In the NULL case it is assumed
1236 1261 * the uid/gid aren't changing.
1237 1262 */
1238 1263 static void *
1239 1264 dmu_objset_userquota_find_data(dmu_buf_impl_t *db, dmu_tx_t *tx)
1240 1265 {
1241 1266 dbuf_dirty_record_t *dr, **drp;
1242 1267 void *data;
1243 1268
1244 1269 if (db->db_dirtycnt == 0)
1245 1270 return (db->db.db_data); /* Nothing is changing */
1246 1271
1247 1272 for (drp = &db->db_last_dirty; (dr = *drp) != NULL; drp = &dr->dr_next)
1248 1273 if (dr->dr_txg == tx->tx_txg)
1249 1274 break;
1250 1275
1251 1276 if (dr == NULL) {
1252 1277 data = NULL;
1253 1278 } else {
1254 1279 dnode_t *dn;
1255 1280
1256 1281 DB_DNODE_ENTER(dr->dr_dbuf);
1257 1282 dn = DB_DNODE(dr->dr_dbuf);
1258 1283
1259 1284 if (dn->dn_bonuslen == 0 &&
1260 1285 dr->dr_dbuf->db_blkid == DMU_SPILL_BLKID)
1261 1286 data = dr->dt.dl.dr_data->b_data;
1262 1287 else
1263 1288 data = dr->dt.dl.dr_data;
1264 1289
1265 1290 DB_DNODE_EXIT(dr->dr_dbuf);
1266 1291 }
1267 1292
1268 1293 return (data);
1269 1294 }
1270 1295
1271 1296 void
1272 1297 dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx)
1273 1298 {
1274 1299 objset_t *os = dn->dn_objset;
1275 1300 void *data = NULL;
1276 1301 dmu_buf_impl_t *db = NULL;
1277 1302 uint64_t *user = NULL;
1278 1303 uint64_t *group = NULL;
1279 1304 int flags = dn->dn_id_flags;
1280 1305 int error;
1281 1306 boolean_t have_spill = B_FALSE;
1282 1307
1283 1308 if (!dmu_objset_userused_enabled(dn->dn_objset))
1284 1309 return;
1285 1310
1286 1311 if (before && (flags & (DN_ID_CHKED_BONUS|DN_ID_OLD_EXIST|
1287 1312 DN_ID_CHKED_SPILL)))
1288 1313 return;
1289 1314
1290 1315 if (before && dn->dn_bonuslen != 0)
1291 1316 data = DN_BONUS(dn->dn_phys);
1292 1317 else if (!before && dn->dn_bonuslen != 0) {
1293 1318 if (dn->dn_bonus) {
1294 1319 db = dn->dn_bonus;
1295 1320 mutex_enter(&db->db_mtx);
1296 1321 data = dmu_objset_userquota_find_data(db, tx);
1297 1322 } else {
1298 1323 data = DN_BONUS(dn->dn_phys);
1299 1324 }
1300 1325 } else if (dn->dn_bonuslen == 0 && dn->dn_bonustype == DMU_OT_SA) {
1301 1326 int rf = 0;
1302 1327
1303 1328 if (RW_WRITE_HELD(&dn->dn_struct_rwlock))
1304 1329 rf |= DB_RF_HAVESTRUCT;
1305 1330 error = dmu_spill_hold_by_dnode(dn,
1306 1331 rf | DB_RF_MUST_SUCCEED,
1307 1332 FTAG, (dmu_buf_t **)&db);
1308 1333 ASSERT(error == 0);
1309 1334 mutex_enter(&db->db_mtx);
1310 1335 data = (before) ? db->db.db_data :
1311 1336 dmu_objset_userquota_find_data(db, tx);
1312 1337 have_spill = B_TRUE;
1313 1338 } else {
1314 1339 mutex_enter(&dn->dn_mtx);
1315 1340 dn->dn_id_flags |= DN_ID_CHKED_BONUS;
1316 1341 mutex_exit(&dn->dn_mtx);
1317 1342 return;
1318 1343 }
1319 1344
1320 1345 if (before) {
1321 1346 ASSERT(data);
1322 1347 user = &dn->dn_olduid;
1323 1348 group = &dn->dn_oldgid;
1324 1349 } else if (data) {
1325 1350 user = &dn->dn_newuid;
1326 1351 group = &dn->dn_newgid;
1327 1352 }
1328 1353
1329 1354 /*
1330 1355 * Must always call the callback in case the object
1331 1356 * type has changed and that type isn't an object type to track
1332 1357 */
1333 1358 error = used_cbs[os->os_phys->os_type](dn->dn_bonustype, data,
1334 1359 user, group);
1335 1360
1336 1361 /*
1337 1362 * Preserve existing uid/gid when the callback can't determine
1338 1363 * what the new uid/gid are and the callback returned EEXIST.
1339 1364 * The EEXIST error tells us to just use the existing uid/gid.
1340 1365 * If we don't know what the old values are then just assign
1341 1366 * them to 0, since that is a new file being created.
1342 1367 */
1343 1368 if (!before && data == NULL && error == EEXIST) {
1344 1369 if (flags & DN_ID_OLD_EXIST) {
1345 1370 dn->dn_newuid = dn->dn_olduid;
1346 1371 dn->dn_newgid = dn->dn_oldgid;
1347 1372 } else {
1348 1373 dn->dn_newuid = 0;
1349 1374 dn->dn_newgid = 0;
1350 1375 }
1351 1376 error = 0;
1352 1377 }
1353 1378
1354 1379 if (db)
1355 1380 mutex_exit(&db->db_mtx);
1356 1381
1357 1382 mutex_enter(&dn->dn_mtx);
1358 1383 if (error == 0 && before)
1359 1384 dn->dn_id_flags |= DN_ID_OLD_EXIST;
1360 1385 if (error == 0 && !before)
1361 1386 dn->dn_id_flags |= DN_ID_NEW_EXIST;
1362 1387
1363 1388 if (have_spill) {
1364 1389 dn->dn_id_flags |= DN_ID_CHKED_SPILL;
1365 1390 } else {
1366 1391 dn->dn_id_flags |= DN_ID_CHKED_BONUS;
1367 1392 }
1368 1393 mutex_exit(&dn->dn_mtx);
1369 1394 if (have_spill)
1370 1395 dmu_buf_rele((dmu_buf_t *)db, FTAG);
1371 1396 }
1372 1397
1373 1398 boolean_t
1374 1399 dmu_objset_userspace_present(objset_t *os)
1375 1400 {
1376 1401 return (os->os_phys->os_flags &
1377 1402 OBJSET_FLAG_USERACCOUNTING_COMPLETE);
1378 1403 }
1379 1404
1380 1405 int
1381 1406 dmu_objset_userspace_upgrade(objset_t *os)
1382 1407 {
1383 1408 uint64_t obj;
1384 1409 int err = 0;
1385 1410
1386 1411 if (dmu_objset_userspace_present(os))
1387 1412 return (0);
1388 1413 if (!dmu_objset_userused_enabled(os))
1389 1414 return (SET_ERROR(ENOTSUP));
1390 1415 if (dmu_objset_is_snapshot(os))
1391 1416 return (SET_ERROR(EINVAL));
1392 1417
1393 1418 /*
1394 1419 * We simply need to mark every object dirty, so that it will be
1395 1420 * synced out and now accounted. If this is called
1396 1421 * concurrently, or if we already did some work before crashing,
1397 1422 * that's fine, since we track each object's accounted state
1398 1423 * independently.
1399 1424 */
1400 1425
1401 1426 for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) {
1402 1427 dmu_tx_t *tx;
1403 1428 dmu_buf_t *db;
1404 1429 int objerr;
1405 1430
1406 1431 if (issig(JUSTLOOKING) && issig(FORREAL))
1407 1432 return (SET_ERROR(EINTR));
1408 1433
1409 1434 objerr = dmu_bonus_hold(os, obj, FTAG, &db);
1410 1435 if (objerr != 0)
1411 1436 continue;
1412 1437 tx = dmu_tx_create(os);
1413 1438 dmu_tx_hold_bonus(tx, obj);
1414 1439 objerr = dmu_tx_assign(tx, TXG_WAIT);
1415 1440 if (objerr != 0) {
1416 1441 dmu_tx_abort(tx);
1417 1442 continue;
1418 1443 }
1419 1444 dmu_buf_will_dirty(db, tx);
1420 1445 dmu_buf_rele(db, FTAG);
1421 1446 dmu_tx_commit(tx);
1422 1447 }
1423 1448
1424 1449 os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
1425 1450 txg_wait_synced(dmu_objset_pool(os), 0);
1426 1451 return (0);
1427 1452 }
1428 1453
1429 1454 void
1430 1455 dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
1431 1456 uint64_t *usedobjsp, uint64_t *availobjsp)
1432 1457 {
1433 1458 dsl_dataset_space(os->os_dsl_dataset, refdbytesp, availbytesp,
1434 1459 usedobjsp, availobjsp);
1435 1460 }
1436 1461
1437 1462 uint64_t
1438 1463 dmu_objset_fsid_guid(objset_t *os)
1439 1464 {
1440 1465 return (dsl_dataset_fsid_guid(os->os_dsl_dataset));
1441 1466 }
1442 1467
1443 1468 void
1444 1469 dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat)
1445 1470 {
1446 1471 stat->dds_type = os->os_phys->os_type;
1447 1472 if (os->os_dsl_dataset)
1448 1473 dsl_dataset_fast_stat(os->os_dsl_dataset, stat);
1449 1474 }
1450 1475
1451 1476 void
1452 1477 dmu_objset_stats(objset_t *os, nvlist_t *nv)
1453 1478 {
1454 1479 ASSERT(os->os_dsl_dataset ||
1455 1480 os->os_phys->os_type == DMU_OST_META);
1456 1481
1457 1482 if (os->os_dsl_dataset != NULL)
1458 1483 dsl_dataset_stats(os->os_dsl_dataset, nv);
1459 1484
|
↓ open down ↓ |
543 lines elided |
↑ open up ↑ |
1460 1485 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_TYPE,
1461 1486 os->os_phys->os_type);
1462 1487 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERACCOUNTING,
1463 1488 dmu_objset_userspace_present(os));
1464 1489 }
1465 1490
1466 1491 int
1467 1492 dmu_objset_is_snapshot(objset_t *os)
1468 1493 {
1469 1494 if (os->os_dsl_dataset != NULL)
1470 - return (dsl_dataset_is_snapshot(os->os_dsl_dataset));
1495 + return (os->os_dsl_dataset->ds_is_snapshot);
1471 1496 else
1472 1497 return (B_FALSE);
1473 1498 }
1474 1499
1475 1500 int
1476 1501 dmu_snapshot_realname(objset_t *os, char *name, char *real, int maxlen,
1477 1502 boolean_t *conflict)
1478 1503 {
1479 1504 dsl_dataset_t *ds = os->os_dsl_dataset;
1480 1505 uint64_t ignored;
1481 1506
1482 1507 if (dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0)
1483 1508 return (SET_ERROR(ENOENT));
1484 1509
1485 1510 return (zap_lookup_norm(ds->ds_dir->dd_pool->dp_meta_objset,
1486 1511 dsl_dataset_phys(ds)->ds_snapnames_zapobj, name, 8, 1, &ignored,
1487 1512 MT_FIRST, real, maxlen, conflict));
1488 1513 }
1489 1514
1490 1515 int
1491 1516 dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
1492 1517 uint64_t *idp, uint64_t *offp, boolean_t *case_conflict)
1493 1518 {
1494 1519 dsl_dataset_t *ds = os->os_dsl_dataset;
1495 1520 zap_cursor_t cursor;
1496 1521 zap_attribute_t attr;
1497 1522
1498 1523 ASSERT(dsl_pool_config_held(dmu_objset_pool(os)));
1499 1524
1500 1525 if (dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0)
1501 1526 return (SET_ERROR(ENOENT));
1502 1527
1503 1528 zap_cursor_init_serialized(&cursor,
1504 1529 ds->ds_dir->dd_pool->dp_meta_objset,
1505 1530 dsl_dataset_phys(ds)->ds_snapnames_zapobj, *offp);
1506 1531
1507 1532 if (zap_cursor_retrieve(&cursor, &attr) != 0) {
1508 1533 zap_cursor_fini(&cursor);
1509 1534 return (SET_ERROR(ENOENT));
1510 1535 }
1511 1536
1512 1537 if (strlen(attr.za_name) + 1 > namelen) {
1513 1538 zap_cursor_fini(&cursor);
1514 1539 return (SET_ERROR(ENAMETOOLONG));
1515 1540 }
1516 1541
1517 1542 (void) strcpy(name, attr.za_name);
1518 1543 if (idp)
1519 1544 *idp = attr.za_first_integer;
1520 1545 if (case_conflict)
1521 1546 *case_conflict = attr.za_normalization_conflict;
1522 1547 zap_cursor_advance(&cursor);
1523 1548 *offp = zap_cursor_serialize(&cursor);
1524 1549 zap_cursor_fini(&cursor);
1525 1550
1526 1551 return (0);
1527 1552 }
1528 1553
1529 1554 int
1530 1555 dmu_dir_list_next(objset_t *os, int namelen, char *name,
1531 1556 uint64_t *idp, uint64_t *offp)
1532 1557 {
1533 1558 dsl_dir_t *dd = os->os_dsl_dataset->ds_dir;
1534 1559 zap_cursor_t cursor;
1535 1560 zap_attribute_t attr;
1536 1561
1537 1562 /* there is no next dir on a snapshot! */
1538 1563 if (os->os_dsl_dataset->ds_object !=
1539 1564 dsl_dir_phys(dd)->dd_head_dataset_obj)
1540 1565 return (SET_ERROR(ENOENT));
1541 1566
1542 1567 zap_cursor_init_serialized(&cursor,
1543 1568 dd->dd_pool->dp_meta_objset,
1544 1569 dsl_dir_phys(dd)->dd_child_dir_zapobj, *offp);
1545 1570
1546 1571 if (zap_cursor_retrieve(&cursor, &attr) != 0) {
1547 1572 zap_cursor_fini(&cursor);
1548 1573 return (SET_ERROR(ENOENT));
1549 1574 }
1550 1575
1551 1576 if (strlen(attr.za_name) + 1 > namelen) {
1552 1577 zap_cursor_fini(&cursor);
1553 1578 return (SET_ERROR(ENAMETOOLONG));
1554 1579 }
1555 1580
1556 1581 (void) strcpy(name, attr.za_name);
1557 1582 if (idp)
1558 1583 *idp = attr.za_first_integer;
1559 1584 zap_cursor_advance(&cursor);
1560 1585 *offp = zap_cursor_serialize(&cursor);
1561 1586 zap_cursor_fini(&cursor);
1562 1587
1563 1588 return (0);
1564 1589 }
1565 1590
1566 1591 /*
1567 1592 * Find objsets under and including ddobj, call func(ds) on each.
1568 1593 */
1569 1594 int
1570 1595 dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj,
1571 1596 int func(dsl_pool_t *, dsl_dataset_t *, void *), void *arg, int flags)
1572 1597 {
1573 1598 dsl_dir_t *dd;
1574 1599 dsl_dataset_t *ds;
1575 1600 zap_cursor_t zc;
1576 1601 zap_attribute_t *attr;
1577 1602 uint64_t thisobj;
1578 1603 int err;
1579 1604
1580 1605 ASSERT(dsl_pool_config_held(dp));
1581 1606
1582 1607 err = dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd);
1583 1608 if (err != 0)
1584 1609 return (err);
1585 1610
1586 1611 /* Don't visit hidden ($MOS & $ORIGIN) objsets. */
1587 1612 if (dd->dd_myname[0] == '$') {
1588 1613 dsl_dir_rele(dd, FTAG);
1589 1614 return (0);
1590 1615 }
1591 1616
1592 1617 thisobj = dsl_dir_phys(dd)->dd_head_dataset_obj;
1593 1618 attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
1594 1619
1595 1620 /*
1596 1621 * Iterate over all children.
1597 1622 */
1598 1623 if (flags & DS_FIND_CHILDREN) {
1599 1624 for (zap_cursor_init(&zc, dp->dp_meta_objset,
1600 1625 dsl_dir_phys(dd)->dd_child_dir_zapobj);
1601 1626 zap_cursor_retrieve(&zc, attr) == 0;
1602 1627 (void) zap_cursor_advance(&zc)) {
1603 1628 ASSERT3U(attr->za_integer_length, ==,
1604 1629 sizeof (uint64_t));
1605 1630 ASSERT3U(attr->za_num_integers, ==, 1);
1606 1631
1607 1632 err = dmu_objset_find_dp(dp, attr->za_first_integer,
1608 1633 func, arg, flags);
1609 1634 if (err != 0)
1610 1635 break;
1611 1636 }
1612 1637 zap_cursor_fini(&zc);
1613 1638
1614 1639 if (err != 0) {
1615 1640 dsl_dir_rele(dd, FTAG);
1616 1641 kmem_free(attr, sizeof (zap_attribute_t));
1617 1642 return (err);
1618 1643 }
1619 1644 }
1620 1645
1621 1646 /*
1622 1647 * Iterate over all snapshots.
1623 1648 */
1624 1649 if (flags & DS_FIND_SNAPSHOTS) {
1625 1650 dsl_dataset_t *ds;
1626 1651 err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
1627 1652
1628 1653 if (err == 0) {
1629 1654 uint64_t snapobj;
1630 1655
1631 1656 snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj;
1632 1657 dsl_dataset_rele(ds, FTAG);
1633 1658
1634 1659 for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj);
1635 1660 zap_cursor_retrieve(&zc, attr) == 0;
1636 1661 (void) zap_cursor_advance(&zc)) {
1637 1662 ASSERT3U(attr->za_integer_length, ==,
1638 1663 sizeof (uint64_t));
1639 1664 ASSERT3U(attr->za_num_integers, ==, 1);
1640 1665
1641 1666 err = dsl_dataset_hold_obj(dp,
1642 1667 attr->za_first_integer, FTAG, &ds);
1643 1668 if (err != 0)
1644 1669 break;
1645 1670 err = func(dp, ds, arg);
1646 1671 dsl_dataset_rele(ds, FTAG);
1647 1672 if (err != 0)
1648 1673 break;
1649 1674 }
1650 1675 zap_cursor_fini(&zc);
1651 1676 }
1652 1677 }
1653 1678
1654 1679 dsl_dir_rele(dd, FTAG);
1655 1680 kmem_free(attr, sizeof (zap_attribute_t));
1656 1681
1657 1682 if (err != 0)
1658 1683 return (err);
1659 1684
1660 1685 /*
1661 1686 * Apply to self.
1662 1687 */
1663 1688 err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
1664 1689 if (err != 0)
1665 1690 return (err);
1666 1691 err = func(dp, ds, arg);
1667 1692 dsl_dataset_rele(ds, FTAG);
1668 1693 return (err);
1669 1694 }
1670 1695
1671 1696 /*
1672 1697 * Find all objsets under name, and for each, call 'func(child_name, arg)'.
1673 1698 * The dp_config_rwlock must not be held when this is called, and it
1674 1699 * will not be held when the callback is called.
1675 1700 * Therefore this function should only be used when the pool is not changing
1676 1701 * (e.g. in syncing context), or the callback can deal with the possible races.
1677 1702 */
1678 1703 static int
1679 1704 dmu_objset_find_impl(spa_t *spa, const char *name,
1680 1705 int func(const char *, void *), void *arg, int flags)
1681 1706 {
1682 1707 dsl_dir_t *dd;
1683 1708 dsl_pool_t *dp = spa_get_dsl(spa);
1684 1709 dsl_dataset_t *ds;
1685 1710 zap_cursor_t zc;
1686 1711 zap_attribute_t *attr;
1687 1712 char *child;
1688 1713 uint64_t thisobj;
1689 1714 int err;
1690 1715
1691 1716 dsl_pool_config_enter(dp, FTAG);
1692 1717
1693 1718 err = dsl_dir_hold(dp, name, FTAG, &dd, NULL);
1694 1719 if (err != 0) {
1695 1720 dsl_pool_config_exit(dp, FTAG);
1696 1721 return (err);
1697 1722 }
1698 1723
1699 1724 /* Don't visit hidden ($MOS & $ORIGIN) objsets. */
1700 1725 if (dd->dd_myname[0] == '$') {
1701 1726 dsl_dir_rele(dd, FTAG);
1702 1727 dsl_pool_config_exit(dp, FTAG);
1703 1728 return (0);
1704 1729 }
1705 1730
1706 1731 thisobj = dsl_dir_phys(dd)->dd_head_dataset_obj;
1707 1732 attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
1708 1733
1709 1734 /*
1710 1735 * Iterate over all children.
1711 1736 */
1712 1737 if (flags & DS_FIND_CHILDREN) {
1713 1738 for (zap_cursor_init(&zc, dp->dp_meta_objset,
1714 1739 dsl_dir_phys(dd)->dd_child_dir_zapobj);
1715 1740 zap_cursor_retrieve(&zc, attr) == 0;
1716 1741 (void) zap_cursor_advance(&zc)) {
1717 1742 ASSERT3U(attr->za_integer_length, ==,
1718 1743 sizeof (uint64_t));
1719 1744 ASSERT3U(attr->za_num_integers, ==, 1);
1720 1745
1721 1746 child = kmem_asprintf("%s/%s", name, attr->za_name);
1722 1747 dsl_pool_config_exit(dp, FTAG);
1723 1748 err = dmu_objset_find_impl(spa, child,
1724 1749 func, arg, flags);
1725 1750 dsl_pool_config_enter(dp, FTAG);
1726 1751 strfree(child);
1727 1752 if (err != 0)
1728 1753 break;
1729 1754 }
1730 1755 zap_cursor_fini(&zc);
1731 1756
1732 1757 if (err != 0) {
1733 1758 dsl_dir_rele(dd, FTAG);
1734 1759 dsl_pool_config_exit(dp, FTAG);
1735 1760 kmem_free(attr, sizeof (zap_attribute_t));
1736 1761 return (err);
1737 1762 }
1738 1763 }
1739 1764
1740 1765 /*
1741 1766 * Iterate over all snapshots.
1742 1767 */
1743 1768 if (flags & DS_FIND_SNAPSHOTS) {
1744 1769 err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
1745 1770
1746 1771 if (err == 0) {
1747 1772 uint64_t snapobj;
1748 1773
1749 1774 snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj;
1750 1775 dsl_dataset_rele(ds, FTAG);
1751 1776
1752 1777 for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj);
1753 1778 zap_cursor_retrieve(&zc, attr) == 0;
1754 1779 (void) zap_cursor_advance(&zc)) {
1755 1780 ASSERT3U(attr->za_integer_length, ==,
1756 1781 sizeof (uint64_t));
1757 1782 ASSERT3U(attr->za_num_integers, ==, 1);
1758 1783
1759 1784 child = kmem_asprintf("%s@%s",
1760 1785 name, attr->za_name);
1761 1786 dsl_pool_config_exit(dp, FTAG);
1762 1787 err = func(child, arg);
1763 1788 dsl_pool_config_enter(dp, FTAG);
1764 1789 strfree(child);
1765 1790 if (err != 0)
1766 1791 break;
1767 1792 }
1768 1793 zap_cursor_fini(&zc);
1769 1794 }
1770 1795 }
1771 1796
1772 1797 dsl_dir_rele(dd, FTAG);
1773 1798 kmem_free(attr, sizeof (zap_attribute_t));
1774 1799 dsl_pool_config_exit(dp, FTAG);
1775 1800
1776 1801 if (err != 0)
1777 1802 return (err);
1778 1803
1779 1804 /* Apply to self. */
1780 1805 return (func(name, arg));
1781 1806 }
1782 1807
1783 1808 /*
1784 1809 * See comment above dmu_objset_find_impl().
1785 1810 */
1786 1811 int
1787 1812 dmu_objset_find(char *name, int func(const char *, void *), void *arg,
1788 1813 int flags)
1789 1814 {
1790 1815 spa_t *spa;
1791 1816 int error;
1792 1817
1793 1818 error = spa_open(name, &spa, FTAG);
1794 1819 if (error != 0)
1795 1820 return (error);
1796 1821 error = dmu_objset_find_impl(spa, name, func, arg, flags);
1797 1822 spa_close(spa, FTAG);
1798 1823 return (error);
1799 1824 }
1800 1825
1801 1826 void
1802 1827 dmu_objset_set_user(objset_t *os, void *user_ptr)
1803 1828 {
1804 1829 ASSERT(MUTEX_HELD(&os->os_user_ptr_lock));
1805 1830 os->os_user_ptr = user_ptr;
1806 1831 }
1807 1832
1808 1833 void *
1809 1834 dmu_objset_get_user(objset_t *os)
1810 1835 {
1811 1836 ASSERT(MUTEX_HELD(&os->os_user_ptr_lock));
1812 1837 return (os->os_user_ptr);
1813 1838 }
1814 1839
1815 1840 /*
1816 1841 * Determine name of filesystem, given name of snapshot.
1817 1842 * buf must be at least MAXNAMELEN bytes
1818 1843 */
1819 1844 int
1820 1845 dmu_fsname(const char *snapname, char *buf)
1821 1846 {
1822 1847 char *atp = strchr(snapname, '@');
1823 1848 if (atp == NULL)
1824 1849 return (SET_ERROR(EINVAL));
1825 1850 if (atp - snapname >= MAXNAMELEN)
1826 1851 return (SET_ERROR(ENAMETOOLONG));
1827 1852 (void) strlcpy(buf, snapname, atp - snapname + 1);
1828 1853 return (0);
1829 1854 }
|
↓ open down ↓ |
349 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX