4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
24 */
25
26 #include <sys/zfs_context.h>
27 #include <sys/dbuf.h>
28 #include <sys/dnode.h>
29 #include <sys/dmu.h>
30 #include <sys/dmu_impl.h>
31 #include <sys/dmu_tx.h>
32 #include <sys/dmu_objset.h>
33 #include <sys/dsl_dir.h>
34 #include <sys/dsl_dataset.h>
35 #include <sys/spa.h>
36 #include <sys/zio.h>
37 #include <sys/dmu_zfetch.h>
38 #include <sys/range_tree.h>
39
40 static kmem_cache_t *dnode_cache;
41 /*
42 * Define DNODE_STATS to turn on statistic gathering. By default, it is only
43 * turned on when DEBUG is also defined.
385 dn->dn_have_spill = B_FALSE;
386 }
387
388 static void
389 dnode_setdblksz(dnode_t *dn, int size)
390 {
391 ASSERT0(P2PHASE(size, SPA_MINBLOCKSIZE));
392 ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
393 ASSERT3U(size, >=, SPA_MINBLOCKSIZE);
394 ASSERT3U(size >> SPA_MINBLOCKSHIFT, <,
395 1<<(sizeof (dn->dn_phys->dn_datablkszsec) * 8));
396 dn->dn_datablksz = size;
397 dn->dn_datablkszsec = size >> SPA_MINBLOCKSHIFT;
398 dn->dn_datablkshift = ISP2(size) ? highbit64(size - 1) : 0;
399 }
400
401 static dnode_t *
402 dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
403 uint64_t object, dnode_handle_t *dnh)
404 {
405 dnode_t *dn = kmem_cache_alloc(dnode_cache, KM_SLEEP);
406
407 ASSERT(!POINTER_IS_VALID(dn->dn_objset));
408 dn->dn_moved = 0;
409
410 /*
411 * Defer setting dn_objset until the dnode is ready to be a candidate
412 * for the dnode_move() callback.
413 */
414 dn->dn_object = object;
415 dn->dn_dbuf = db;
416 dn->dn_handle = dnh;
417 dn->dn_phys = dnp;
418
419 if (dnp->dn_datablkszsec) {
420 dnode_setdblksz(dn, dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
421 } else {
422 dn->dn_datablksz = 0;
423 dn->dn_datablkszsec = 0;
424 dn->dn_datablkshift = 0;
425 }
426 dn->dn_indblkshift = dnp->dn_indblkshift;
427 dn->dn_nlevels = dnp->dn_nlevels;
428 dn->dn_type = dnp->dn_type;
429 dn->dn_nblkptr = dnp->dn_nblkptr;
430 dn->dn_checksum = dnp->dn_checksum;
431 dn->dn_compress = dnp->dn_compress;
432 dn->dn_bonustype = dnp->dn_bonustype;
433 dn->dn_bonuslen = dnp->dn_bonuslen;
434 dn->dn_maxblkid = dnp->dn_maxblkid;
435 dn->dn_have_spill = ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) != 0);
436 dn->dn_id_flags = 0;
437
438 dmu_zfetch_init(&dn->dn_zfetch, dn);
439
440 ASSERT(DMU_OT_IS_VALID(dn->dn_phys->dn_type));
441
442 mutex_enter(&os->os_lock);
443 list_insert_head(&os->os_dnodes, dn);
444 membar_producer();
445 /*
446 * Everything else must be valid before assigning dn_objset makes the
447 * dnode eligible for dnode_move().
448 */
449 dn->dn_objset = os;
450 mutex_exit(&os->os_lock);
451
452 arc_space_consume(sizeof (dnode_t), ARC_SPACE_OTHER);
453 return (dn);
454 }
455
456 /*
457 * Caller must be holding the dnode handle, which is released upon return.
458 */
459 static void
460 dnode_destroy(dnode_t *dn)
461 {
462 objset_t *os = dn->dn_objset;
463
464 ASSERT((dn->dn_id_flags & DN_ID_NEW_EXIST) == 0);
465
466 mutex_enter(&os->os_lock);
467 POINTER_INVALIDATE(&dn->dn_objset);
468 list_remove(&os->os_dnodes, dn);
469 mutex_exit(&os->os_lock);
470
471 /* the dnode can no longer move, so we can release the handle */
472 zrl_remove(&dn->dn_handle->dnh_zrlock);
473
474 dn->dn_allocated_txg = 0;
475 dn->dn_free_txg = 0;
476 dn->dn_assigned_txg = 0;
477
478 dn->dn_dirtyctx = 0;
479 if (dn->dn_dirtyctx_firstset != NULL) {
480 kmem_free(dn->dn_dirtyctx_firstset, 1);
481 dn->dn_dirtyctx_firstset = NULL;
482 }
483 if (dn->dn_bonus != NULL) {
484 mutex_enter(&dn->dn_bonus->db_mtx);
485 dbuf_evict(dn->dn_bonus);
486 dn->dn_bonus = NULL;
487 }
488 dn->dn_zio = NULL;
489
490 dn->dn_have_spill = B_FALSE;
491 dn->dn_oldused = 0;
492 dn->dn_oldflags = 0;
493 dn->dn_olduid = 0;
494 dn->dn_oldgid = 0;
495 dn->dn_newuid = 0;
496 dn->dn_newgid = 0;
497 dn->dn_id_flags = 0;
498 dn->dn_unlisted_l0_blkid = 0;
499
500 dmu_zfetch_rele(&dn->dn_zfetch);
501 kmem_cache_free(dnode_cache, dn);
502 arc_space_return(sizeof (dnode_t), ARC_SPACE_OTHER);
503 }
504
505 void
506 dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
507 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
508 {
509 int i;
510
511 ASSERT3U(blocksize, <=,
512 spa_maxblocksize(dmu_objset_spa(dn->dn_objset)));
513 if (blocksize == 0)
514 blocksize = 1 << zfs_default_bs;
515 else
516 blocksize = P2ROUNDUP(blocksize, SPA_MINBLOCKSIZE);
517
518 if (ibs == 0)
519 ibs = zfs_default_ibs;
520
521 ibs = MIN(MAX(ibs, DN_MIN_INDBLKSHIFT), DN_MAX_INDBLKSHIFT);
522
949 zrl_exit(&ndn->dn_handle->dnh_zrlock); /* handle has moved */
950 mutex_exit(&os->os_lock);
951
952 return (KMEM_CBRC_YES);
953 }
954 #endif /* _KERNEL */
955
956 void
957 dnode_special_close(dnode_handle_t *dnh)
958 {
959 dnode_t *dn = dnh->dnh_dnode;
960
961 /*
962 * Wait for final references to the dnode to clear. This can
963 * only happen if the arc is asyncronously evicting state that
964 * has a hold on this dnode while we are trying to evict this
965 * dnode.
966 */
967 while (refcount_count(&dn->dn_holds) > 0)
968 delay(1);
969 zrl_add(&dnh->dnh_zrlock);
970 dnode_destroy(dn); /* implicit zrl_remove() */
971 zrl_destroy(&dnh->dnh_zrlock);
972 dnh->dnh_dnode = NULL;
973 }
974
975 dnode_t *
976 dnode_special_open(objset_t *os, dnode_phys_t *dnp, uint64_t object,
977 dnode_handle_t *dnh)
978 {
979 dnode_t *dn = dnode_create(os, dnp, NULL, object, dnh);
980 dnh->dnh_dnode = dn;
981 zrl_init(&dnh->dnh_zrlock);
982 DNODE_VERIFY(dn);
983 return (dn);
984 }
985
986 static void
987 dnode_buf_pageout(dmu_buf_t *db, void *arg)
988 {
989 dnode_children_t *children_dnodes = arg;
990 int i;
991 int epb = db->db_size >> DNODE_SHIFT;
992
993 ASSERT(epb == children_dnodes->dnc_count);
994
995 for (i = 0; i < epb; i++) {
996 dnode_handle_t *dnh = &children_dnodes->dnc_children[i];
997 dnode_t *dn;
998
999 /*
1000 * The dnode handle lock guards against the dnode moving to
1001 * another valid address, so there is no need here to guard
1002 * against changes to or from NULL.
1003 */
1004 if (dnh->dnh_dnode == NULL) {
1005 zrl_destroy(&dnh->dnh_zrlock);
1006 continue;
1007 }
1008
1009 zrl_add(&dnh->dnh_zrlock);
1010 dn = dnh->dnh_dnode;
1011 /*
1012 * If there are holds on this dnode, then there should
1013 * be holds on the dnode's containing dbuf as well; thus
1014 * it wouldn't be eligible for eviction and this function
1015 * would not have been called.
1016 */
1017 ASSERT(refcount_is_zero(&dn->dn_holds));
1018 ASSERT(refcount_is_zero(&dn->dn_tx_holds));
1019
1020 dnode_destroy(dn); /* implicit zrl_remove() */
1021 zrl_destroy(&dnh->dnh_zrlock);
1022 dnh->dnh_dnode = NULL;
1023 }
1024 kmem_free(children_dnodes, sizeof (dnode_children_t) +
1025 epb * sizeof (dnode_handle_t));
1026 }
1027
1028 /*
1029 * errors:
1030 * EINVAL - invalid object number.
1031 * EIO - i/o error.
1032 * succeeds even for free dnodes.
1033 */
1034 int
1035 dnode_hold_impl(objset_t *os, uint64_t object, int flag,
1036 void *tag, dnode_t **dnp)
1037 {
1038 int epb, idx, err;
1039 int drop_struct_lock = FALSE;
1040 int type;
1041 uint64_t blk;
1042 dnode_t *mdn, *dn;
1043 dmu_buf_impl_t *db;
1044 dnode_children_t *children_dnodes;
1045 dnode_handle_t *dnh;
1089 if (drop_struct_lock)
1090 rw_exit(&mdn->dn_struct_rwlock);
1091 if (db == NULL)
1092 return (SET_ERROR(EIO));
1093 err = dbuf_read(db, NULL, DB_RF_CANFAIL);
1094 if (err) {
1095 dbuf_rele(db, FTAG);
1096 return (err);
1097 }
1098
1099 ASSERT3U(db->db.db_size, >=, 1<<DNODE_SHIFT);
1100 epb = db->db.db_size >> DNODE_SHIFT;
1101
1102 idx = object & (epb-1);
1103
1104 ASSERT(DB_DNODE(db)->dn_type == DMU_OT_DNODE);
1105 children_dnodes = dmu_buf_get_user(&db->db);
1106 if (children_dnodes == NULL) {
1107 int i;
1108 dnode_children_t *winner;
1109 children_dnodes = kmem_alloc(sizeof (dnode_children_t) +
1110 epb * sizeof (dnode_handle_t), KM_SLEEP);
1111 children_dnodes->dnc_count = epb;
1112 dnh = &children_dnodes->dnc_children[0];
1113 for (i = 0; i < epb; i++) {
1114 zrl_init(&dnh[i].dnh_zrlock);
1115 dnh[i].dnh_dnode = NULL;
1116 }
1117 if (winner = dmu_buf_set_user(&db->db, children_dnodes,
1118 dnode_buf_pageout)) {
1119
1120 for (i = 0; i < epb; i++) {
1121 zrl_destroy(&dnh[i].dnh_zrlock);
1122 }
1123
1124 kmem_free(children_dnodes, sizeof (dnode_children_t) +
1125 epb * sizeof (dnode_handle_t));
1126 children_dnodes = winner;
1127 }
1128 }
1129 ASSERT(children_dnodes->dnc_count == epb);
1130
1131 dnh = &children_dnodes->dnc_children[idx];
1132 zrl_add(&dnh->dnh_zrlock);
1133 if ((dn = dnh->dnh_dnode) == NULL) {
1134 dnode_phys_t *phys = (dnode_phys_t *)db->db.db_data+idx;
1135 dnode_t *winner;
1136
1137 dn = dnode_create(os, phys, db, object, dnh);
1138 winner = atomic_cas_ptr(&dnh->dnh_dnode, NULL, dn);
1139 if (winner != NULL) {
1140 zrl_add(&dnh->dnh_zrlock);
1141 dnode_destroy(dn); /* implicit zrl_remove() */
1142 dn = winner;
1143 }
1144 }
1145
1146 mutex_enter(&dn->dn_mtx);
1147 type = dn->dn_type;
1148 if (dn->dn_free_txg ||
1149 ((flag & DNODE_MUST_BE_ALLOCATED) && type == DMU_OT_NONE) ||
1150 ((flag & DNODE_MUST_BE_FREE) &&
1151 (type != DMU_OT_NONE || !refcount_is_zero(&dn->dn_holds)))) {
1152 mutex_exit(&dn->dn_mtx);
1153 zrl_remove(&dnh->dnh_zrlock);
1154 dbuf_rele(db, FTAG);
1155 return (type == DMU_OT_NONE ? ENOENT : EEXIST);
1156 }
1157 mutex_exit(&dn->dn_mtx);
1158
1159 if (refcount_add(&dn->dn_holds, tag) == 1)
1160 dbuf_add_ref(db, dnh);
1161 /* Now we can rely on the hold to prevent the dnode from moving. */
1162 zrl_remove(&dnh->dnh_zrlock);
1163
1164 DNODE_VERIFY(dn);
1165 ASSERT3P(dn->dn_dbuf, ==, db);
1166 ASSERT3U(dn->dn_object, ==, object);
1167 dbuf_rele(db, FTAG);
1168
1169 *dnp = dn;
1170 return (0);
1171 }
1172
1173 /*
1174 * Return held dnode if the object is allocated, NULL if not.
1175 */
1176 int
1177 dnode_hold(objset_t *os, uint64_t object, void *tag, dnode_t **dnp)
1178 {
1179 return (dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, tag, dnp));
1180 }
|
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
24 * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
25 */
26
27 #include <sys/zfs_context.h>
28 #include <sys/dbuf.h>
29 #include <sys/dnode.h>
30 #include <sys/dmu.h>
31 #include <sys/dmu_impl.h>
32 #include <sys/dmu_tx.h>
33 #include <sys/dmu_objset.h>
34 #include <sys/dsl_dir.h>
35 #include <sys/dsl_dataset.h>
36 #include <sys/spa.h>
37 #include <sys/zio.h>
38 #include <sys/dmu_zfetch.h>
39 #include <sys/range_tree.h>
40
41 static kmem_cache_t *dnode_cache;
42 /*
43 * Define DNODE_STATS to turn on statistic gathering. By default, it is only
44 * turned on when DEBUG is also defined.
386 dn->dn_have_spill = B_FALSE;
387 }
388
389 static void
390 dnode_setdblksz(dnode_t *dn, int size)
391 {
392 ASSERT0(P2PHASE(size, SPA_MINBLOCKSIZE));
393 ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
394 ASSERT3U(size, >=, SPA_MINBLOCKSIZE);
395 ASSERT3U(size >> SPA_MINBLOCKSHIFT, <,
396 1<<(sizeof (dn->dn_phys->dn_datablkszsec) * 8));
397 dn->dn_datablksz = size;
398 dn->dn_datablkszsec = size >> SPA_MINBLOCKSHIFT;
399 dn->dn_datablkshift = ISP2(size) ? highbit64(size - 1) : 0;
400 }
401
402 static dnode_t *
403 dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
404 uint64_t object, dnode_handle_t *dnh)
405 {
406 dnode_t *dn;
407
408 dn = kmem_cache_alloc(dnode_cache, KM_SLEEP);
409 ASSERT(!POINTER_IS_VALID(dn->dn_objset));
410 dn->dn_moved = 0;
411
412 /*
413 * Defer setting dn_objset until the dnode is ready to be a candidate
414 * for the dnode_move() callback.
415 */
416 dn->dn_object = object;
417 dn->dn_dbuf = db;
418 dn->dn_handle = dnh;
419 dn->dn_phys = dnp;
420
421 if (dnp->dn_datablkszsec) {
422 dnode_setdblksz(dn, dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
423 } else {
424 dn->dn_datablksz = 0;
425 dn->dn_datablkszsec = 0;
426 dn->dn_datablkshift = 0;
427 }
428 dn->dn_indblkshift = dnp->dn_indblkshift;
429 dn->dn_nlevels = dnp->dn_nlevels;
430 dn->dn_type = dnp->dn_type;
431 dn->dn_nblkptr = dnp->dn_nblkptr;
432 dn->dn_checksum = dnp->dn_checksum;
433 dn->dn_compress = dnp->dn_compress;
434 dn->dn_bonustype = dnp->dn_bonustype;
435 dn->dn_bonuslen = dnp->dn_bonuslen;
436 dn->dn_maxblkid = dnp->dn_maxblkid;
437 dn->dn_have_spill = ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) != 0);
438 dn->dn_id_flags = 0;
439
440 dmu_zfetch_init(&dn->dn_zfetch, dn);
441
442 ASSERT(DMU_OT_IS_VALID(dn->dn_phys->dn_type));
443
444 mutex_enter(&os->os_lock);
445 if (dnh->dnh_dnode != NULL) {
446 /* Lost the allocation race. */
447 mutex_exit(&os->os_lock);
448 kmem_cache_free(dnode_cache, dn);
449 return (dnh->dnh_dnode);
450 }
451
452 /*
453 * Exclude special dnodes from os_dnodes so an empty os_dnodes
454 * signifies that the special dnodes have no references from
455 * their children (the entries in os_dnodes). This allows
456 * dnode_destroy() to easily determine if the last child has
457 * been removed and then complete eviction of the objset.
458 */
459 if (!DMU_OBJECT_IS_SPECIAL(object))
460 list_insert_head(&os->os_dnodes, dn);
461 membar_producer();
462
463 /*
464 * Everything else must be valid before assigning dn_objset
465 * makes the dnode eligible for dnode_move().
466 */
467 dn->dn_objset = os;
468
469 dnh->dnh_dnode = dn;
470 mutex_exit(&os->os_lock);
471
472 arc_space_consume(sizeof (dnode_t), ARC_SPACE_OTHER);
473 return (dn);
474 }
475
476 /*
477 * Caller must be holding the dnode handle, which is released upon return.
478 */
479 static void
480 dnode_destroy(dnode_t *dn)
481 {
482 objset_t *os = dn->dn_objset;
483 boolean_t complete_os_eviction = B_FALSE;
484
485 ASSERT((dn->dn_id_flags & DN_ID_NEW_EXIST) == 0);
486
487 mutex_enter(&os->os_lock);
488 POINTER_INVALIDATE(&dn->dn_objset);
489 if (!DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
490 list_remove(&os->os_dnodes, dn);
491 complete_os_eviction =
492 list_is_empty(&os->os_dnodes) &&
493 list_link_active(&os->os_evicting_node);
494 }
495 mutex_exit(&os->os_lock);
496
497 /* the dnode can no longer move, so we can release the handle */
498 zrl_remove(&dn->dn_handle->dnh_zrlock);
499
500 dn->dn_allocated_txg = 0;
501 dn->dn_free_txg = 0;
502 dn->dn_assigned_txg = 0;
503
504 dn->dn_dirtyctx = 0;
505 if (dn->dn_dirtyctx_firstset != NULL) {
506 kmem_free(dn->dn_dirtyctx_firstset, 1);
507 dn->dn_dirtyctx_firstset = NULL;
508 }
509 if (dn->dn_bonus != NULL) {
510 mutex_enter(&dn->dn_bonus->db_mtx);
511 dbuf_evict(dn->dn_bonus);
512 dn->dn_bonus = NULL;
513 }
514 dn->dn_zio = NULL;
515
516 dn->dn_have_spill = B_FALSE;
517 dn->dn_oldused = 0;
518 dn->dn_oldflags = 0;
519 dn->dn_olduid = 0;
520 dn->dn_oldgid = 0;
521 dn->dn_newuid = 0;
522 dn->dn_newgid = 0;
523 dn->dn_id_flags = 0;
524 dn->dn_unlisted_l0_blkid = 0;
525
526 dmu_zfetch_rele(&dn->dn_zfetch);
527 kmem_cache_free(dnode_cache, dn);
528 arc_space_return(sizeof (dnode_t), ARC_SPACE_OTHER);
529
530 if (complete_os_eviction)
531 dmu_objset_evict_done(os);
532 }
533
534 void
535 dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
536 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
537 {
538 int i;
539
540 ASSERT3U(blocksize, <=,
541 spa_maxblocksize(dmu_objset_spa(dn->dn_objset)));
542 if (blocksize == 0)
543 blocksize = 1 << zfs_default_bs;
544 else
545 blocksize = P2ROUNDUP(blocksize, SPA_MINBLOCKSIZE);
546
547 if (ibs == 0)
548 ibs = zfs_default_ibs;
549
550 ibs = MIN(MAX(ibs, DN_MIN_INDBLKSHIFT), DN_MAX_INDBLKSHIFT);
551
978 zrl_exit(&ndn->dn_handle->dnh_zrlock); /* handle has moved */
979 mutex_exit(&os->os_lock);
980
981 return (KMEM_CBRC_YES);
982 }
983 #endif /* _KERNEL */
984
985 void
986 dnode_special_close(dnode_handle_t *dnh)
987 {
988 dnode_t *dn = dnh->dnh_dnode;
989
990 /*
991 * Wait for final references to the dnode to clear. This can
992 * only happen if the arc is asyncronously evicting state that
993 * has a hold on this dnode while we are trying to evict this
994 * dnode.
995 */
996 while (refcount_count(&dn->dn_holds) > 0)
997 delay(1);
998 ASSERT(dn->dn_dbuf == NULL ||
999 dmu_buf_get_user(&dn->dn_dbuf->db) == NULL);
1000 zrl_add(&dnh->dnh_zrlock);
1001 dnode_destroy(dn); /* implicit zrl_remove() */
1002 zrl_destroy(&dnh->dnh_zrlock);
1003 dnh->dnh_dnode = NULL;
1004 }
1005
1006 void
1007 dnode_special_open(objset_t *os, dnode_phys_t *dnp, uint64_t object,
1008 dnode_handle_t *dnh)
1009 {
1010 dnode_t *dn;
1011
1012 dn = dnode_create(os, dnp, NULL, object, dnh);
1013 zrl_init(&dnh->dnh_zrlock);
1014 DNODE_VERIFY(dn);
1015 }
1016
1017 static void
1018 dnode_buf_pageout(void *dbu)
1019 {
1020 dnode_children_t *children_dnodes = dbu;
1021 int i;
1022
1023 for (i = 0; i < children_dnodes->dnc_count; i++) {
1024 dnode_handle_t *dnh = &children_dnodes->dnc_children[i];
1025 dnode_t *dn;
1026
1027 /*
1028 * The dnode handle lock guards against the dnode moving to
1029 * another valid address, so there is no need here to guard
1030 * against changes to or from NULL.
1031 */
1032 if (dnh->dnh_dnode == NULL) {
1033 zrl_destroy(&dnh->dnh_zrlock);
1034 continue;
1035 }
1036
1037 zrl_add(&dnh->dnh_zrlock);
1038 dn = dnh->dnh_dnode;
1039 /*
1040 * If there are holds on this dnode, then there should
1041 * be holds on the dnode's containing dbuf as well; thus
1042 * it wouldn't be eligible for eviction and this function
1043 * would not have been called.
1044 */
1045 ASSERT(refcount_is_zero(&dn->dn_holds));
1046 ASSERT(refcount_is_zero(&dn->dn_tx_holds));
1047
1048 dnode_destroy(dn); /* implicit zrl_remove() */
1049 zrl_destroy(&dnh->dnh_zrlock);
1050 dnh->dnh_dnode = NULL;
1051 }
1052 kmem_free(children_dnodes, sizeof (dnode_children_t) +
1053 children_dnodes->dnc_count * sizeof (dnode_handle_t));
1054 }
1055
1056 /*
1057 * errors:
1058 * EINVAL - invalid object number.
1059 * EIO - i/o error.
1060 * succeeds even for free dnodes.
1061 */
1062 int
1063 dnode_hold_impl(objset_t *os, uint64_t object, int flag,
1064 void *tag, dnode_t **dnp)
1065 {
1066 int epb, idx, err;
1067 int drop_struct_lock = FALSE;
1068 int type;
1069 uint64_t blk;
1070 dnode_t *mdn, *dn;
1071 dmu_buf_impl_t *db;
1072 dnode_children_t *children_dnodes;
1073 dnode_handle_t *dnh;
1117 if (drop_struct_lock)
1118 rw_exit(&mdn->dn_struct_rwlock);
1119 if (db == NULL)
1120 return (SET_ERROR(EIO));
1121 err = dbuf_read(db, NULL, DB_RF_CANFAIL);
1122 if (err) {
1123 dbuf_rele(db, FTAG);
1124 return (err);
1125 }
1126
1127 ASSERT3U(db->db.db_size, >=, 1<<DNODE_SHIFT);
1128 epb = db->db.db_size >> DNODE_SHIFT;
1129
1130 idx = object & (epb-1);
1131
1132 ASSERT(DB_DNODE(db)->dn_type == DMU_OT_DNODE);
1133 children_dnodes = dmu_buf_get_user(&db->db);
1134 if (children_dnodes == NULL) {
1135 int i;
1136 dnode_children_t *winner;
1137 children_dnodes = kmem_zalloc(sizeof (dnode_children_t) +
1138 epb * sizeof (dnode_handle_t), KM_SLEEP);
1139 children_dnodes->dnc_count = epb;
1140 dnh = &children_dnodes->dnc_children[0];
1141 for (i = 0; i < epb; i++) {
1142 zrl_init(&dnh[i].dnh_zrlock);
1143 }
1144 dmu_buf_init_user(&children_dnodes->dnc_dbu,
1145 dnode_buf_pageout, NULL);
1146 winner = dmu_buf_set_user(&db->db, &children_dnodes->dnc_dbu);
1147 if (winner != NULL) {
1148
1149 for (i = 0; i < epb; i++) {
1150 zrl_destroy(&dnh[i].dnh_zrlock);
1151 }
1152
1153 kmem_free(children_dnodes, sizeof (dnode_children_t) +
1154 epb * sizeof (dnode_handle_t));
1155 children_dnodes = winner;
1156 }
1157 }
1158 ASSERT(children_dnodes->dnc_count == epb);
1159
1160 dnh = &children_dnodes->dnc_children[idx];
1161 zrl_add(&dnh->dnh_zrlock);
1162 dn = dnh->dnh_dnode;
1163 if (dn == NULL) {
1164 dnode_phys_t *phys = (dnode_phys_t *)db->db.db_data+idx;
1165
1166 dn = dnode_create(os, phys, db, object, dnh);
1167 }
1168
1169 mutex_enter(&dn->dn_mtx);
1170 type = dn->dn_type;
1171 if (dn->dn_free_txg ||
1172 ((flag & DNODE_MUST_BE_ALLOCATED) && type == DMU_OT_NONE) ||
1173 ((flag & DNODE_MUST_BE_FREE) &&
1174 (type != DMU_OT_NONE || !refcount_is_zero(&dn->dn_holds)))) {
1175 mutex_exit(&dn->dn_mtx);
1176 zrl_remove(&dnh->dnh_zrlock);
1177 dbuf_rele(db, FTAG);
1178 return (type == DMU_OT_NONE ? ENOENT : EEXIST);
1179 }
1180 if (refcount_add(&dn->dn_holds, tag) == 1)
1181 dbuf_add_ref(db, dnh);
1182 mutex_exit(&dn->dn_mtx);
1183
1184 /* Now we can rely on the hold to prevent the dnode from moving. */
1185 zrl_remove(&dnh->dnh_zrlock);
1186
1187 DNODE_VERIFY(dn);
1188 ASSERT3P(dn->dn_dbuf, ==, db);
1189 ASSERT3U(dn->dn_object, ==, object);
1190 dbuf_rele(db, FTAG);
1191
1192 *dnp = dn;
1193 return (0);
1194 }
1195
1196 /*
1197 * Return held dnode if the object is allocated, NULL if not.
1198 */
1199 int
1200 dnode_hold(objset_t *os, uint64_t object, void *tag, dnode_t **dnp)
1201 {
1202 return (dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, tag, dnp));
1203 }
|