255 /* Only data blocks support the attachment of user data. */
256 ASSERT(db->db_level == 0);
257
258 /* Clients must resolve a dbuf before attaching user data. */
259 ASSERT(db->db.db_data != NULL);
260 ASSERT3U(db->db_state, ==, DB_CACHED);
261
262 holds = refcount_count(&db->db_holds);
263 if (verify_type == DBVU_EVICTING) {
264 /*
265 * Immediate eviction occurs when holds == dirtycnt.
266 * For normal eviction buffers, holds is zero on
267 * eviction, except when dbuf_fix_old_data() calls
268 * dbuf_clear_data(). However, the hold count can grow
269 * during eviction even though db_mtx is held (see
270 * dmu_bonus_hold() for an example), so we can only
271 * test the generic invariant that holds >= dirtycnt.
272 */
273 ASSERT3U(holds, >=, db->db_dirtycnt);
274 } else {
275 if (db->db_immediate_evict == TRUE)
276 ASSERT3U(holds, >=, db->db_dirtycnt);
277 else
278 ASSERT3U(holds, >, 0);
279 }
280 #endif
281 }
282
283 static void
284 dbuf_evict_user(dmu_buf_impl_t *db)
285 {
286 dmu_buf_user_t *dbu = db->db_user;
287
288 ASSERT(MUTEX_HELD(&db->db_mtx));
289
290 if (dbu == NULL)
291 return;
292
293 dbuf_verify_user(db, DBVU_EVICTING);
294 db->db_user = NULL;
295
1080 }
1081 mutex_exit(&db->db_mtx);
1082
1083 dnode_willuse_space(dn, size-osize, tx);
1084 DB_DNODE_EXIT(db);
1085 }
1086
1087 void
1088 dbuf_release_bp(dmu_buf_impl_t *db)
1089 {
1090 objset_t *os = db->db_objset;
1091
1092 ASSERT(dsl_pool_sync_context(dmu_objset_pool(os)));
1093 ASSERT(arc_released(os->os_phys_buf) ||
1094 list_link_active(&os->os_dsl_dataset->ds_synced_link));
1095 ASSERT(db->db_parent == NULL || arc_released(db->db_parent->db_buf));
1096
1097 (void) arc_release(db->db_buf, db);
1098 }
1099
1100 dbuf_dirty_record_t *
1101 dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
1102 {
1103 dnode_t *dn;
1104 objset_t *os;
1105 dbuf_dirty_record_t **drp, *dr;
1106 int drop_struct_lock = FALSE;
1107 boolean_t do_free_accounting = B_FALSE;
1108 int txgoff = tx->tx_txg & TXG_MASK;
1109
1110 ASSERT(tx->tx_txg != 0);
1111 ASSERT(!refcount_is_zero(&db->db_holds));
1112 DMU_TX_DIRTY_BUF(tx, db);
1113
1114 DB_DNODE_ENTER(db);
1115 dn = DB_DNODE(db);
1116 /*
1117 * Shouldn't dirty a regular buffer in syncing context. Private
1118 * objects may be dirtied in syncing context, but only if they
1119 * were already pre-dirtied in open context.
1152 (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN);
1153 ASSERT(dn->dn_dirtyctx_firstset == NULL);
1154 dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_SLEEP);
1155 }
1156 mutex_exit(&dn->dn_mtx);
1157
1158 if (db->db_blkid == DMU_SPILL_BLKID)
1159 dn->dn_have_spill = B_TRUE;
1160
1161 /*
1162 * If this buffer is already dirty, we're done.
1163 */
1164 drp = &db->db_last_dirty;
1165 ASSERT(*drp == NULL || (*drp)->dr_txg <= tx->tx_txg ||
1166 db->db.db_object == DMU_META_DNODE_OBJECT);
1167 while ((dr = *drp) != NULL && dr->dr_txg > tx->tx_txg)
1168 drp = &dr->dr_next;
1169 if (dr && dr->dr_txg == tx->tx_txg) {
1170 DB_DNODE_EXIT(db);
1171
1172 if (db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID) {
1173 /*
1174 * If this buffer has already been written out,
1175 * we now need to reset its state.
1176 */
1177 dbuf_unoverride(dr);
1178 if (db->db.db_object != DMU_META_DNODE_OBJECT &&
1179 db->db_state != DB_NOFILL)
1180 arc_buf_thaw(db->db_buf);
1181 }
1182 mutex_exit(&db->db_mtx);
1183 return (dr);
1184 }
1185
1186 /*
1187 * Only valid if not already dirty.
1188 */
1189 ASSERT(dn->dn_object == 0 ||
1190 dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx ==
1191 (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN));
1192
1193 ASSERT3U(dn->dn_nlevels, >, db->db_level);
1194 ASSERT((dn->dn_phys->dn_nlevels == 0 && db->db_level == 0) ||
1195 dn->dn_phys->dn_nlevels > db->db_level ||
1196 dn->dn_next_nlevels[txgoff] > db->db_level ||
1197 dn->dn_next_nlevels[(tx->tx_txg-1) & TXG_MASK] > db->db_level ||
1198 dn->dn_next_nlevels[(tx->tx_txg-2) & TXG_MASK] > db->db_level);
1199
1200 /*
1201 * We should only be dirtying in syncing context if it's the
1471
1472 ASSERT(db->db_state == DB_NOFILL || arc_released(buf));
1473 dbuf_clear_data(db);
1474 VERIFY(arc_buf_remove_ref(buf, db));
1475 dbuf_evict(db);
1476 return (B_TRUE);
1477 }
1478
1479 return (B_FALSE);
1480 }
1481
1482 void
1483 dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
1484 {
1485 dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
1486 int rf = DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH;
1487
1488 ASSERT(tx->tx_txg != 0);
1489 ASSERT(!refcount_is_zero(&db->db_holds));
1490
1491 DB_DNODE_ENTER(db);
1492 if (RW_WRITE_HELD(&DB_DNODE(db)->dn_struct_rwlock))
1493 rf |= DB_RF_HAVESTRUCT;
1494 DB_DNODE_EXIT(db);
1495 (void) dbuf_read(db, NULL, rf);
1496 (void) dbuf_dirty(db, tx);
1497 }
1498
1499 void
1500 dmu_buf_will_not_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
1501 {
1502 dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
1503
1504 db->db_state = DB_NOFILL;
1505
1506 dmu_buf_will_fill(db_fake, tx);
1507 }
1508
1509 void
1510 dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
1794 {
1795 objset_t *os = dn->dn_objset;
1796 dmu_buf_impl_t *db, *odb;
1797
1798 ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
1799 ASSERT(dn->dn_type != DMU_OT_NONE);
1800
1801 db = kmem_cache_alloc(dbuf_cache, KM_SLEEP);
1802
1803 db->db_objset = os;
1804 db->db.db_object = dn->dn_object;
1805 db->db_level = level;
1806 db->db_blkid = blkid;
1807 db->db_last_dirty = NULL;
1808 db->db_dirtycnt = 0;
1809 db->db_dnode_handle = dn->dn_handle;
1810 db->db_parent = parent;
1811 db->db_blkptr = blkptr;
1812
1813 db->db_user = NULL;
1814 db->db_immediate_evict = 0;
1815 db->db_freed_in_flight = 0;
1816
1817 if (blkid == DMU_BONUS_BLKID) {
1818 ASSERT3P(parent, ==, dn->dn_dbuf);
1819 db->db.db_size = DN_MAX_BONUSLEN -
1820 (dn->dn_nblkptr-1) * sizeof (blkptr_t);
1821 ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen);
1822 db->db.db_offset = DMU_BONUS_BLKID;
1823 db->db_state = DB_UNCACHED;
1824 /* the bonus dbuf is not placed in the hash table */
1825 arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER);
1826 return (db);
1827 } else if (blkid == DMU_SPILL_BLKID) {
1828 db->db.db_size = (blkptr != NULL) ?
1829 BP_GET_LSIZE(blkptr) : SPA_MINBLOCKSIZE;
1830 db->db.db_offset = 0;
1831 } else {
1832 int blocksize =
1833 db->db_level ? 1 << dn->dn_indblkshift : dn->dn_datablksz;
1834 db->db.db_size = blocksize;
1835 db->db.db_offset = db->db_blkid * blocksize;
2187
2188 ASSERT(MUTEX_HELD(&db->db_mtx));
2189 DBUF_VERIFY(db);
2190
2191 /*
2192 * Remove the reference to the dbuf before removing its hold on the
2193 * dnode so we can guarantee in dnode_move() that a referenced bonus
2194 * buffer has a corresponding dnode hold.
2195 */
2196 holds = refcount_remove(&db->db_holds, tag);
2197 ASSERT(holds >= 0);
2198
2199 /*
2200 * We can't freeze indirects if there is a possibility that they
2201 * may be modified in the current syncing context.
2202 */
2203 if (db->db_buf && holds == (db->db_level == 0 ? db->db_dirtycnt : 0))
2204 arc_buf_freeze(db->db_buf);
2205
2206 if (holds == db->db_dirtycnt &&
2207 db->db_level == 0 && db->db_immediate_evict)
2208 dbuf_evict_user(db);
2209
2210 if (holds == 0) {
2211 if (db->db_blkid == DMU_BONUS_BLKID) {
2212 dnode_t *dn;
2213
2214 /*
2215 * If the dnode moves here, we cannot cross this
2216 * barrier until the move completes.
2217 */
2218 DB_DNODE_ENTER(db);
2219
2220 dn = DB_DNODE(db);
2221 atomic_dec_32(&dn->dn_dbufs_count);
2222
2223 /*
2224 * Decrementing the dbuf count means that the bonus
2225 * buffer's dnode hold is no longer discounted in
2226 * dnode_move(). The dnode cannot move until after
2227 * the dnode_rele_and_unlock() below.
2228 */
2229 DB_DNODE_EXIT(db);
2230
2231 /*
2232 * Do not reference db after its lock is dropped.
2233 * Another thread may evict it.
2234 */
2235 mutex_exit(&db->db_mtx);
2236
2237 /*
2238 * If the dnode has been freed, evict the bonus
2239 * buffer immediately. The data in the bonus
2240 * buffer is no longer relevant and this prevents
2241 * a stale bonus buffer from being associated
2242 * with this dnode_t should the dnode_t be reused
2243 * prior to being destroyed.
2244 */
2245 mutex_enter(&dn->dn_mtx);
2246 if (dn->dn_type == DMU_OT_NONE ||
2247 dn->dn_free_txg != 0) {
2248 /*
2249 * Drop dn_mtx. It is a leaf lock and
2250 * cannot be held when dnode_evict_bonus()
2251 * acquires other locks in order to
2252 * perform the eviction.
2253 *
2254 * Freed dnodes cannot be reused until the
2255 * last hold is released. Since this bonus
2256 * buffer has a hold, the dnode will remain
2257 * in the free state, even without dn_mtx
2258 * held, until the dnode_rele_and_unlock()
2259 * below.
2260 */
2261 mutex_exit(&dn->dn_mtx);
2262 dnode_evict_bonus(dn);
2263 mutex_enter(&dn->dn_mtx);
2264 }
2265 dnode_rele_and_unlock(dn, db);
2266 } else if (db->db_buf == NULL) {
2267 /*
2268 * This is a special case: we never associated this
2269 * dbuf with any data allocated from the ARC.
2270 */
2271 ASSERT(db->db_state == DB_UNCACHED ||
2272 db->db_state == DB_NOFILL);
2273 dbuf_evict(db);
2274 } else if (arc_released(db->db_buf)) {
2275 arc_buf_t *buf = db->db_buf;
2276 /*
2277 * This dbuf has anonymous data associated with it.
2278 */
2279 dbuf_clear_data(db);
2280 VERIFY(arc_buf_remove_ref(buf, db));
2281 dbuf_evict(db);
2282 } else {
2283 VERIFY(!arc_buf_remove_ref(db->db_buf, db));
2284
2285 /*
2292 * criteria set in the property.
2293 *
2294 * To decide if our buffer is considered a
2295 * duplicate, we must call into the arc to determine
2296 * if multiple buffers are referencing the same
2297 * block on-disk. If so, then we simply evict
2298 * ourselves.
2299 */
2300 if (!DBUF_IS_CACHEABLE(db)) {
2301 if (db->db_blkptr != NULL &&
2302 !BP_IS_HOLE(db->db_blkptr) &&
2303 !BP_IS_EMBEDDED(db->db_blkptr)) {
2304 spa_t *spa =
2305 dmu_objset_spa(db->db_objset);
2306 blkptr_t bp = *db->db_blkptr;
2307 dbuf_clear(db);
2308 arc_freed(spa, &bp);
2309 } else {
2310 dbuf_clear(db);
2311 }
2312 } else if (db->db_objset->os_evicting ||
2313 arc_buf_eviction_needed(db->db_buf)) {
2314 dbuf_clear(db);
2315 } else {
2316 mutex_exit(&db->db_mtx);
2317 }
2318 }
2319 } else {
2320 mutex_exit(&db->db_mtx);
2321 }
2322 }
2323
2324 #pragma weak dmu_buf_refcount = dbuf_refcount
2325 uint64_t
2326 dbuf_refcount(dmu_buf_impl_t *db)
2327 {
2328 return (refcount_count(&db->db_holds));
2329 }
2330
2331 void *
2332 dmu_buf_replace_user(dmu_buf_t *db_fake, dmu_buf_user_t *old_user,
2340 db->db_user = new_user;
2341 else
2342 old_user = db->db_user;
2343 dbuf_verify_user(db, DBVU_NOT_EVICTING);
2344 mutex_exit(&db->db_mtx);
2345
2346 return (old_user);
2347 }
2348
2349 void *
2350 dmu_buf_set_user(dmu_buf_t *db_fake, dmu_buf_user_t *user)
2351 {
2352 return (dmu_buf_replace_user(db_fake, NULL, user));
2353 }
2354
2355 void *
2356 dmu_buf_set_user_ie(dmu_buf_t *db_fake, dmu_buf_user_t *user)
2357 {
2358 dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
2359
2360 db->db_immediate_evict = TRUE;
2361 return (dmu_buf_set_user(db_fake, user));
2362 }
2363
2364 void *
2365 dmu_buf_remove_user(dmu_buf_t *db_fake, dmu_buf_user_t *user)
2366 {
2367 return (dmu_buf_replace_user(db_fake, user, NULL));
2368 }
2369
2370 void *
2371 dmu_buf_get_user(dmu_buf_t *db_fake)
2372 {
2373 dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
2374
2375 dbuf_verify_user(db, DBVU_NOT_EVICTING);
2376 return (db->db_user);
2377 }
2378
2379 void
2380 dmu_buf_user_evict_wait()
|
255 /* Only data blocks support the attachment of user data. */
256 ASSERT(db->db_level == 0);
257
258 /* Clients must resolve a dbuf before attaching user data. */
259 ASSERT(db->db.db_data != NULL);
260 ASSERT3U(db->db_state, ==, DB_CACHED);
261
262 holds = refcount_count(&db->db_holds);
263 if (verify_type == DBVU_EVICTING) {
264 /*
265 * Immediate eviction occurs when holds == dirtycnt.
266 * For normal eviction buffers, holds is zero on
267 * eviction, except when dbuf_fix_old_data() calls
268 * dbuf_clear_data(). However, the hold count can grow
269 * during eviction even though db_mtx is held (see
270 * dmu_bonus_hold() for an example), so we can only
271 * test the generic invariant that holds >= dirtycnt.
272 */
273 ASSERT3U(holds, >=, db->db_dirtycnt);
274 } else {
275 if (db->db_user_immediate_evict == TRUE)
276 ASSERT3U(holds, >=, db->db_dirtycnt);
277 else
278 ASSERT3U(holds, >, 0);
279 }
280 #endif
281 }
282
283 static void
284 dbuf_evict_user(dmu_buf_impl_t *db)
285 {
286 dmu_buf_user_t *dbu = db->db_user;
287
288 ASSERT(MUTEX_HELD(&db->db_mtx));
289
290 if (dbu == NULL)
291 return;
292
293 dbuf_verify_user(db, DBVU_EVICTING);
294 db->db_user = NULL;
295
1080 }
1081 mutex_exit(&db->db_mtx);
1082
1083 dnode_willuse_space(dn, size-osize, tx);
1084 DB_DNODE_EXIT(db);
1085 }
1086
1087 void
1088 dbuf_release_bp(dmu_buf_impl_t *db)
1089 {
1090 objset_t *os = db->db_objset;
1091
1092 ASSERT(dsl_pool_sync_context(dmu_objset_pool(os)));
1093 ASSERT(arc_released(os->os_phys_buf) ||
1094 list_link_active(&os->os_dsl_dataset->ds_synced_link));
1095 ASSERT(db->db_parent == NULL || arc_released(db->db_parent->db_buf));
1096
1097 (void) arc_release(db->db_buf, db);
1098 }
1099
1100 /*
1101 * We already have a dirty record for this TXG, and we are being
1102 * dirtied again.
1103 */
1104 static void
1105 dbuf_redirty(dbuf_dirty_record_t *dr)
1106 {
1107 dmu_buf_impl_t *db = dr->dr_dbuf;
1108
1109 ASSERT(MUTEX_HELD(&db->db_mtx));
1110
1111 if (db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID) {
1112 /*
1113 * If this buffer has already been written out,
1114 * we now need to reset its state.
1115 */
1116 dbuf_unoverride(dr);
1117 if (db->db.db_object != DMU_META_DNODE_OBJECT &&
1118 db->db_state != DB_NOFILL) {
1119 /* Already released on initial dirty, so just thaw. */
1120 ASSERT(arc_released(db->db_buf));
1121 arc_buf_thaw(db->db_buf);
1122 }
1123 }
1124 }
1125
1126 dbuf_dirty_record_t *
1127 dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
1128 {
1129 dnode_t *dn;
1130 objset_t *os;
1131 dbuf_dirty_record_t **drp, *dr;
1132 int drop_struct_lock = FALSE;
1133 boolean_t do_free_accounting = B_FALSE;
1134 int txgoff = tx->tx_txg & TXG_MASK;
1135
1136 ASSERT(tx->tx_txg != 0);
1137 ASSERT(!refcount_is_zero(&db->db_holds));
1138 DMU_TX_DIRTY_BUF(tx, db);
1139
1140 DB_DNODE_ENTER(db);
1141 dn = DB_DNODE(db);
1142 /*
1143 * Shouldn't dirty a regular buffer in syncing context. Private
1144 * objects may be dirtied in syncing context, but only if they
1145 * were already pre-dirtied in open context.
1178 (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN);
1179 ASSERT(dn->dn_dirtyctx_firstset == NULL);
1180 dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_SLEEP);
1181 }
1182 mutex_exit(&dn->dn_mtx);
1183
1184 if (db->db_blkid == DMU_SPILL_BLKID)
1185 dn->dn_have_spill = B_TRUE;
1186
1187 /*
1188 * If this buffer is already dirty, we're done.
1189 */
1190 drp = &db->db_last_dirty;
1191 ASSERT(*drp == NULL || (*drp)->dr_txg <= tx->tx_txg ||
1192 db->db.db_object == DMU_META_DNODE_OBJECT);
1193 while ((dr = *drp) != NULL && dr->dr_txg > tx->tx_txg)
1194 drp = &dr->dr_next;
1195 if (dr && dr->dr_txg == tx->tx_txg) {
1196 DB_DNODE_EXIT(db);
1197
1198 dbuf_redirty(dr);
1199 mutex_exit(&db->db_mtx);
1200 return (dr);
1201 }
1202
1203 /*
1204 * Only valid if not already dirty.
1205 */
1206 ASSERT(dn->dn_object == 0 ||
1207 dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx ==
1208 (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN));
1209
1210 ASSERT3U(dn->dn_nlevels, >, db->db_level);
1211 ASSERT((dn->dn_phys->dn_nlevels == 0 && db->db_level == 0) ||
1212 dn->dn_phys->dn_nlevels > db->db_level ||
1213 dn->dn_next_nlevels[txgoff] > db->db_level ||
1214 dn->dn_next_nlevels[(tx->tx_txg-1) & TXG_MASK] > db->db_level ||
1215 dn->dn_next_nlevels[(tx->tx_txg-2) & TXG_MASK] > db->db_level);
1216
1217 /*
1218 * We should only be dirtying in syncing context if it's the
1488
1489 ASSERT(db->db_state == DB_NOFILL || arc_released(buf));
1490 dbuf_clear_data(db);
1491 VERIFY(arc_buf_remove_ref(buf, db));
1492 dbuf_evict(db);
1493 return (B_TRUE);
1494 }
1495
1496 return (B_FALSE);
1497 }
1498
1499 void
1500 dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
1501 {
1502 dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
1503 int rf = DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH;
1504
1505 ASSERT(tx->tx_txg != 0);
1506 ASSERT(!refcount_is_zero(&db->db_holds));
1507
1508 /*
1509 * Quick check for dirtyness. For already dirty blocks, this
1510 * reduces runtime of this function by >90%, and overall performance
1511 * by 50% for some workloads (e.g. file deletion with indirect blocks
1512 * cached).
1513 */
1514 mutex_enter(&db->db_mtx);
1515 dbuf_dirty_record_t *dr;
1516 for (dr = db->db_last_dirty;
1517 dr != NULL && dr->dr_txg >= tx->tx_txg; dr = dr->dr_next) {
1518 /*
1519 * It's possible that it is already dirty but not cached,
1520 * because there are some calls to dbuf_dirty() that don't
1521 * go through dmu_buf_will_dirty().
1522 */
1523 if (dr->dr_txg == tx->tx_txg && db->db_state == DB_CACHED) {
1524 /* This dbuf is already dirty and cached. */
1525 dbuf_redirty(dr);
1526 mutex_exit(&db->db_mtx);
1527 return;
1528 }
1529 }
1530 mutex_exit(&db->db_mtx);
1531
1532 DB_DNODE_ENTER(db);
1533 if (RW_WRITE_HELD(&DB_DNODE(db)->dn_struct_rwlock))
1534 rf |= DB_RF_HAVESTRUCT;
1535 DB_DNODE_EXIT(db);
1536 (void) dbuf_read(db, NULL, rf);
1537 (void) dbuf_dirty(db, tx);
1538 }
1539
1540 void
1541 dmu_buf_will_not_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
1542 {
1543 dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
1544
1545 db->db_state = DB_NOFILL;
1546
1547 dmu_buf_will_fill(db_fake, tx);
1548 }
1549
1550 void
1551 dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
1835 {
1836 objset_t *os = dn->dn_objset;
1837 dmu_buf_impl_t *db, *odb;
1838
1839 ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
1840 ASSERT(dn->dn_type != DMU_OT_NONE);
1841
1842 db = kmem_cache_alloc(dbuf_cache, KM_SLEEP);
1843
1844 db->db_objset = os;
1845 db->db.db_object = dn->dn_object;
1846 db->db_level = level;
1847 db->db_blkid = blkid;
1848 db->db_last_dirty = NULL;
1849 db->db_dirtycnt = 0;
1850 db->db_dnode_handle = dn->dn_handle;
1851 db->db_parent = parent;
1852 db->db_blkptr = blkptr;
1853
1854 db->db_user = NULL;
1855 db->db_user_immediate_evict = FALSE;
1856 db->db_freed_in_flight = FALSE;
1857 db->db_pending_evict = FALSE;
1858
1859 if (blkid == DMU_BONUS_BLKID) {
1860 ASSERT3P(parent, ==, dn->dn_dbuf);
1861 db->db.db_size = DN_MAX_BONUSLEN -
1862 (dn->dn_nblkptr-1) * sizeof (blkptr_t);
1863 ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen);
1864 db->db.db_offset = DMU_BONUS_BLKID;
1865 db->db_state = DB_UNCACHED;
1866 /* the bonus dbuf is not placed in the hash table */
1867 arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER);
1868 return (db);
1869 } else if (blkid == DMU_SPILL_BLKID) {
1870 db->db.db_size = (blkptr != NULL) ?
1871 BP_GET_LSIZE(blkptr) : SPA_MINBLOCKSIZE;
1872 db->db.db_offset = 0;
1873 } else {
1874 int blocksize =
1875 db->db_level ? 1 << dn->dn_indblkshift : dn->dn_datablksz;
1876 db->db.db_size = blocksize;
1877 db->db.db_offset = db->db_blkid * blocksize;
2229
2230 ASSERT(MUTEX_HELD(&db->db_mtx));
2231 DBUF_VERIFY(db);
2232
2233 /*
2234 * Remove the reference to the dbuf before removing its hold on the
2235 * dnode so we can guarantee in dnode_move() that a referenced bonus
2236 * buffer has a corresponding dnode hold.
2237 */
2238 holds = refcount_remove(&db->db_holds, tag);
2239 ASSERT(holds >= 0);
2240
2241 /*
2242 * We can't freeze indirects if there is a possibility that they
2243 * may be modified in the current syncing context.
2244 */
2245 if (db->db_buf && holds == (db->db_level == 0 ? db->db_dirtycnt : 0))
2246 arc_buf_freeze(db->db_buf);
2247
2248 if (holds == db->db_dirtycnt &&
2249 db->db_level == 0 && db->db_user_immediate_evict)
2250 dbuf_evict_user(db);
2251
2252 if (holds == 0) {
2253 if (db->db_blkid == DMU_BONUS_BLKID) {
2254 dnode_t *dn;
2255 boolean_t evict_dbuf = db->db_pending_evict;
2256
2257 /*
2258 * If the dnode moves here, we cannot cross this
2259 * barrier until the move completes.
2260 */
2261 DB_DNODE_ENTER(db);
2262
2263 dn = DB_DNODE(db);
2264 atomic_dec_32(&dn->dn_dbufs_count);
2265
2266 /*
2267 * Decrementing the dbuf count means that the bonus
2268 * buffer's dnode hold is no longer discounted in
2269 * dnode_move(). The dnode cannot move until after
2270 * the dnode_rele() below.
2271 */
2272 DB_DNODE_EXIT(db);
2273
2274 /*
2275 * Do not reference db after its lock is dropped.
2276 * Another thread may evict it.
2277 */
2278 mutex_exit(&db->db_mtx);
2279
2280 if (evict_dbuf)
2281 dnode_evict_bonus(dn);
2282
2283 dnode_rele(dn, db);
2284 } else if (db->db_buf == NULL) {
2285 /*
2286 * This is a special case: we never associated this
2287 * dbuf with any data allocated from the ARC.
2288 */
2289 ASSERT(db->db_state == DB_UNCACHED ||
2290 db->db_state == DB_NOFILL);
2291 dbuf_evict(db);
2292 } else if (arc_released(db->db_buf)) {
2293 arc_buf_t *buf = db->db_buf;
2294 /*
2295 * This dbuf has anonymous data associated with it.
2296 */
2297 dbuf_clear_data(db);
2298 VERIFY(arc_buf_remove_ref(buf, db));
2299 dbuf_evict(db);
2300 } else {
2301 VERIFY(!arc_buf_remove_ref(db->db_buf, db));
2302
2303 /*
2310 * criteria set in the property.
2311 *
2312 * To decide if our buffer is considered a
2313 * duplicate, we must call into the arc to determine
2314 * if multiple buffers are referencing the same
2315 * block on-disk. If so, then we simply evict
2316 * ourselves.
2317 */
2318 if (!DBUF_IS_CACHEABLE(db)) {
2319 if (db->db_blkptr != NULL &&
2320 !BP_IS_HOLE(db->db_blkptr) &&
2321 !BP_IS_EMBEDDED(db->db_blkptr)) {
2322 spa_t *spa =
2323 dmu_objset_spa(db->db_objset);
2324 blkptr_t bp = *db->db_blkptr;
2325 dbuf_clear(db);
2326 arc_freed(spa, &bp);
2327 } else {
2328 dbuf_clear(db);
2329 }
2330 } else if (db->db_pending_evict ||
2331 arc_buf_eviction_needed(db->db_buf)) {
2332 dbuf_clear(db);
2333 } else {
2334 mutex_exit(&db->db_mtx);
2335 }
2336 }
2337 } else {
2338 mutex_exit(&db->db_mtx);
2339 }
2340 }
2341
2342 #pragma weak dmu_buf_refcount = dbuf_refcount
2343 uint64_t
2344 dbuf_refcount(dmu_buf_impl_t *db)
2345 {
2346 return (refcount_count(&db->db_holds));
2347 }
2348
2349 void *
2350 dmu_buf_replace_user(dmu_buf_t *db_fake, dmu_buf_user_t *old_user,
2358 db->db_user = new_user;
2359 else
2360 old_user = db->db_user;
2361 dbuf_verify_user(db, DBVU_NOT_EVICTING);
2362 mutex_exit(&db->db_mtx);
2363
2364 return (old_user);
2365 }
2366
2367 void *
2368 dmu_buf_set_user(dmu_buf_t *db_fake, dmu_buf_user_t *user)
2369 {
2370 return (dmu_buf_replace_user(db_fake, NULL, user));
2371 }
2372
2373 void *
2374 dmu_buf_set_user_ie(dmu_buf_t *db_fake, dmu_buf_user_t *user)
2375 {
2376 dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
2377
2378 db->db_user_immediate_evict = TRUE;
2379 return (dmu_buf_set_user(db_fake, user));
2380 }
2381
2382 void *
2383 dmu_buf_remove_user(dmu_buf_t *db_fake, dmu_buf_user_t *user)
2384 {
2385 return (dmu_buf_replace_user(db_fake, user, NULL));
2386 }
2387
2388 void *
2389 dmu_buf_get_user(dmu_buf_t *db_fake)
2390 {
2391 dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
2392
2393 dbuf_verify_user(db, DBVU_NOT_EVICTING);
2394 return (db->db_user);
2395 }
2396
2397 void
2398 dmu_buf_user_evict_wait()
|