159
160 ASSERT(bpo->bpo_dbuf == NULL);
161 ASSERT(bpo->bpo_phys == NULL);
162 ASSERT(object != 0);
163 ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ);
164 ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPOBJ_HDR);
165
166 err = dmu_bonus_hold(os, object, bpo, &bpo->bpo_dbuf);
167 if (err)
168 return (err);
169
170 bpo->bpo_os = os;
171 bpo->bpo_object = object;
172 bpo->bpo_epb = doi.doi_data_block_size >> SPA_BLKPTRSHIFT;
173 bpo->bpo_havecomp = (doi.doi_bonus_size > BPOBJ_SIZE_V0);
174 bpo->bpo_havesubobj = (doi.doi_bonus_size > BPOBJ_SIZE_V1);
175 bpo->bpo_phys = bpo->bpo_dbuf->db_data;
176 return (0);
177 }
178
179 boolean_t
180 bpobj_is_open(const bpobj_t *bpo)
181 {
182 return (bpo->bpo_object != 0);
183 }
184
185 void
186 bpobj_close(bpobj_t *bpo)
187 {
188 /* Lame workaround for closing a bpobj that was never opened. */
189 if (bpo->bpo_object == 0)
190 return;
191
192 dmu_buf_rele(bpo->bpo_dbuf, bpo);
193 if (bpo->bpo_cached_dbuf != NULL)
194 dmu_buf_rele(bpo->bpo_cached_dbuf, bpo);
195 bpo->bpo_dbuf = NULL;
196 bpo->bpo_phys = NULL;
197 bpo->bpo_cached_dbuf = NULL;
198 bpo->bpo_object = 0;
199
200 mutex_destroy(&bpo->bpo_lock);
201 }
202
203 boolean_t
204 bpobj_is_empty(bpobj_t *bpo)
205 {
206 return (bpo->bpo_phys->bpo_num_blkptrs == 0 &&
207 (!bpo->bpo_havesubobj || bpo->bpo_phys->bpo_num_subobjs == 0));
208 }
209
210 static int
211 bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx,
212 boolean_t free)
213 {
214 dmu_object_info_t doi;
215 int epb;
216 int64_t i;
217 int err = 0;
218 dmu_buf_t *dbuf = NULL;
219
220 ASSERT(bpobj_is_open(bpo));
221 mutex_enter(&bpo->bpo_lock);
222
223 if (free)
224 dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
225
226 for (i = bpo->bpo_phys->bpo_num_blkptrs - 1; i >= 0; i--) {
227 blkptr_t *bparray;
228 blkptr_t *bp;
229 uint64_t offset, blkoff;
230
231 offset = i * sizeof (blkptr_t);
232 blkoff = P2PHASE(i, bpo->bpo_epb);
233
234 if (dbuf == NULL || dbuf->db_offset > offset) {
235 if (dbuf)
236 dmu_buf_rele(dbuf, FTAG);
237 err = dmu_buf_hold(bpo->bpo_os, bpo->bpo_object, offset,
238 FTAG, &dbuf, 0);
239 if (err)
240 break;
241 }
242
332 err = dmu_object_free(bpo->bpo_os,
333 objarray[blkoff], tx);
334 if (err)
335 break;
336 bpo->bpo_phys->bpo_num_subobjs--;
337 ASSERT3S(bpo->bpo_phys->bpo_num_subobjs, >=, 0);
338 }
339 }
340 if (dbuf) {
341 dmu_buf_rele(dbuf, FTAG);
342 dbuf = NULL;
343 }
344 if (free) {
345 VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os,
346 bpo->bpo_phys->bpo_subobjs,
347 (i + 1) * sizeof (uint64_t), -1ULL, tx));
348 }
349
350 out:
351 /* If there are no entries, there should be no bytes. */
352 if (bpobj_is_empty(bpo)) {
353 ASSERT0(bpo->bpo_phys->bpo_bytes);
354 ASSERT0(bpo->bpo_phys->bpo_comp);
355 ASSERT0(bpo->bpo_phys->bpo_uncomp);
356 }
357
358 mutex_exit(&bpo->bpo_lock);
359 return (err);
360 }
361
362 /*
363 * Iterate and remove the entries. If func returns nonzero, iteration
364 * will stop and that entry will not be removed.
365 */
366 int
367 bpobj_iterate(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx)
368 {
369 return (bpobj_iterate_impl(bpo, func, arg, tx, B_TRUE));
370 }
371
372 /*
373 * Iterate the entries. If func returns nonzero, iteration will stop.
374 */
375 int
376 bpobj_iterate_nofree(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx)
377 {
378 return (bpobj_iterate_impl(bpo, func, arg, tx, B_FALSE));
379 }
380
381 void
382 bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
383 {
384 bpobj_t subbpo;
385 uint64_t used, comp, uncomp, subsubobjs;
386
387 ASSERT(bpobj_is_open(bpo));
388 ASSERT(subobj != 0);
389 ASSERT(bpo->bpo_havesubobj);
390 ASSERT(bpo->bpo_havecomp);
391 ASSERT(bpo->bpo_object != dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj);
392
393 if (subobj == dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj) {
394 bpobj_decr_empty(bpo->bpo_os, tx);
395 return;
396 }
397
398 VERIFY3U(0, ==, bpobj_open(&subbpo, bpo->bpo_os, subobj));
399 VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp));
400
401 if (bpobj_is_empty(&subbpo)) {
402 /* No point in having an empty subobj. */
403 bpobj_close(&subbpo);
404 bpobj_free(bpo->bpo_os, subobj, tx);
405 return;
406 }
407
408 mutex_enter(&bpo->bpo_lock);
409 dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
410 if (bpo->bpo_phys->bpo_subobjs == 0) {
411 bpo->bpo_phys->bpo_subobjs = dmu_object_alloc(bpo->bpo_os,
412 DMU_OT_BPOBJ_SUBOBJ, SPA_OLD_MAXBLOCKSIZE,
413 DMU_OT_NONE, 0, tx);
414 }
415
416 dmu_object_info_t doi;
417 ASSERT0(dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi));
418 ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ_SUBOBJ);
419
420 dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
421 bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj),
455 VERIFY3U(0, ==, dmu_object_free(bpo->bpo_os,
456 subsubobjs, tx));
457 }
458 }
459 bpo->bpo_phys->bpo_bytes += used;
460 bpo->bpo_phys->bpo_comp += comp;
461 bpo->bpo_phys->bpo_uncomp += uncomp;
462 mutex_exit(&bpo->bpo_lock);
463
464 bpobj_close(&subbpo);
465 }
466
467 void
468 bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, dmu_tx_t *tx)
469 {
470 blkptr_t stored_bp = *bp;
471 uint64_t offset;
472 int blkoff;
473 blkptr_t *bparray;
474
475 ASSERT(bpobj_is_open(bpo));
476 ASSERT(!BP_IS_HOLE(bp));
477 ASSERT(bpo->bpo_object != dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj);
478
479 if (BP_IS_EMBEDDED(bp)) {
480 /*
481 * The bpobj will compress better without the payload.
482 *
483 * Note that we store EMBEDDED bp's because they have an
484 * uncompressed size, which must be accounted for. An
485 * alternative would be to add their size to bpo_uncomp
486 * without storing the bp, but that would create additional
487 * complications: bpo_uncomp would be inconsistent with the
488 * set of BP's stored, and bpobj_iterate() wouldn't visit
489 * all the space accounted for in the bpobj.
490 */
491 bzero(&stored_bp, sizeof (stored_bp));
492 stored_bp.blk_prop = bp->blk_prop;
493 stored_bp.blk_birth = bp->blk_birth;
494 } else if (!BP_GET_DEDUP(bp)) {
495 /* The bpobj will compress better without the checksum */
541 /* ARGSUSED */
542 static int
543 space_range_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
544 {
545 struct space_range_arg *sra = arg;
546
547 if (bp->blk_birth > sra->mintxg && bp->blk_birth <= sra->maxtxg) {
548 if (dsl_pool_sync_context(spa_get_dsl(sra->spa)))
549 sra->used += bp_get_dsize_sync(sra->spa, bp);
550 else
551 sra->used += bp_get_dsize(sra->spa, bp);
552 sra->comp += BP_GET_PSIZE(bp);
553 sra->uncomp += BP_GET_UCSIZE(bp);
554 }
555 return (0);
556 }
557
558 int
559 bpobj_space(bpobj_t *bpo, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
560 {
561 ASSERT(bpobj_is_open(bpo));
562 mutex_enter(&bpo->bpo_lock);
563
564 *usedp = bpo->bpo_phys->bpo_bytes;
565 if (bpo->bpo_havecomp) {
566 *compp = bpo->bpo_phys->bpo_comp;
567 *uncompp = bpo->bpo_phys->bpo_uncomp;
568 mutex_exit(&bpo->bpo_lock);
569 return (0);
570 } else {
571 mutex_exit(&bpo->bpo_lock);
572 return (bpobj_space_range(bpo, 0, UINT64_MAX,
573 usedp, compp, uncompp));
574 }
575 }
576
577 /*
578 * Return the amount of space in the bpobj which is:
579 * mintxg < blk_birth <= maxtxg
580 */
581 int
582 bpobj_space_range(bpobj_t *bpo, uint64_t mintxg, uint64_t maxtxg,
583 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
584 {
585 struct space_range_arg sra = { 0 };
586 int err;
587
588 ASSERT(bpobj_is_open(bpo));
589
590 /*
591 * As an optimization, if they want the whole txg range, just
592 * get bpo_bytes rather than iterating over the bps.
593 */
594 if (mintxg < TXG_INITIAL && maxtxg == UINT64_MAX && bpo->bpo_havecomp)
595 return (bpobj_space(bpo, usedp, compp, uncompp));
596
597 sra.spa = dmu_objset_spa(bpo->bpo_os);
598 sra.mintxg = mintxg;
599 sra.maxtxg = maxtxg;
600
601 err = bpobj_iterate_nofree(bpo, space_range_cb, &sra, NULL);
602 *usedp = sra.used;
603 *compp = sra.comp;
604 *uncompp = sra.uncomp;
605 return (err);
606 }
|
159
160 ASSERT(bpo->bpo_dbuf == NULL);
161 ASSERT(bpo->bpo_phys == NULL);
162 ASSERT(object != 0);
163 ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ);
164 ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPOBJ_HDR);
165
166 err = dmu_bonus_hold(os, object, bpo, &bpo->bpo_dbuf);
167 if (err)
168 return (err);
169
170 bpo->bpo_os = os;
171 bpo->bpo_object = object;
172 bpo->bpo_epb = doi.doi_data_block_size >> SPA_BLKPTRSHIFT;
173 bpo->bpo_havecomp = (doi.doi_bonus_size > BPOBJ_SIZE_V0);
174 bpo->bpo_havesubobj = (doi.doi_bonus_size > BPOBJ_SIZE_V1);
175 bpo->bpo_phys = bpo->bpo_dbuf->db_data;
176 return (0);
177 }
178
179 void
180 bpobj_close(bpobj_t *bpo)
181 {
182 /* Lame workaround for closing a bpobj that was never opened. */
183 if (bpo->bpo_object == 0)
184 return;
185
186 dmu_buf_rele(bpo->bpo_dbuf, bpo);
187 if (bpo->bpo_cached_dbuf != NULL)
188 dmu_buf_rele(bpo->bpo_cached_dbuf, bpo);
189 bpo->bpo_dbuf = NULL;
190 bpo->bpo_phys = NULL;
191 bpo->bpo_cached_dbuf = NULL;
192 bpo->bpo_object = 0;
193
194 mutex_destroy(&bpo->bpo_lock);
195 }
196
197 static boolean_t
198 bpobj_hasentries(bpobj_t *bpo)
199 {
200 return (bpo->bpo_phys->bpo_num_blkptrs != 0 ||
201 (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_num_subobjs != 0));
202 }
203
204 static int
205 bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx,
206 boolean_t free)
207 {
208 dmu_object_info_t doi;
209 int epb;
210 int64_t i;
211 int err = 0;
212 dmu_buf_t *dbuf = NULL;
213
214 mutex_enter(&bpo->bpo_lock);
215
216 if (!bpobj_hasentries(bpo))
217 goto out;
218
219 if (free)
220 dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
221
222 for (i = bpo->bpo_phys->bpo_num_blkptrs - 1; i >= 0; i--) {
223 blkptr_t *bparray;
224 blkptr_t *bp;
225 uint64_t offset, blkoff;
226
227 offset = i * sizeof (blkptr_t);
228 blkoff = P2PHASE(i, bpo->bpo_epb);
229
230 if (dbuf == NULL || dbuf->db_offset > offset) {
231 if (dbuf)
232 dmu_buf_rele(dbuf, FTAG);
233 err = dmu_buf_hold(bpo->bpo_os, bpo->bpo_object, offset,
234 FTAG, &dbuf, 0);
235 if (err)
236 break;
237 }
238
328 err = dmu_object_free(bpo->bpo_os,
329 objarray[blkoff], tx);
330 if (err)
331 break;
332 bpo->bpo_phys->bpo_num_subobjs--;
333 ASSERT3S(bpo->bpo_phys->bpo_num_subobjs, >=, 0);
334 }
335 }
336 if (dbuf) {
337 dmu_buf_rele(dbuf, FTAG);
338 dbuf = NULL;
339 }
340 if (free) {
341 VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os,
342 bpo->bpo_phys->bpo_subobjs,
343 (i + 1) * sizeof (uint64_t), -1ULL, tx));
344 }
345
346 out:
347 /* If there are no entries, there should be no bytes. */
348 if (!bpobj_hasentries(bpo)) {
349 ASSERT0(bpo->bpo_phys->bpo_bytes);
350 ASSERT0(bpo->bpo_phys->bpo_comp);
351 ASSERT0(bpo->bpo_phys->bpo_uncomp);
352 }
353
354 mutex_exit(&bpo->bpo_lock);
355 return (err);
356 }
357
358 /*
359 * Iterate and remove the entries. If func returns nonzero, iteration
360 * will stop and that entry will not be removed.
361 */
362 int
363 bpobj_iterate(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx)
364 {
365 return (bpobj_iterate_impl(bpo, func, arg, tx, B_TRUE));
366 }
367
368 /*
369 * Iterate the entries. If func returns nonzero, iteration will stop.
370 */
371 int
372 bpobj_iterate_nofree(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx)
373 {
374 return (bpobj_iterate_impl(bpo, func, arg, tx, B_FALSE));
375 }
376
377 void
378 bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
379 {
380 bpobj_t subbpo;
381 uint64_t used, comp, uncomp, subsubobjs;
382
383 ASSERT(bpo->bpo_havesubobj);
384 ASSERT(bpo->bpo_havecomp);
385 ASSERT(bpo->bpo_object != dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj);
386
387 if (subobj == dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj) {
388 bpobj_decr_empty(bpo->bpo_os, tx);
389 return;
390 }
391
392 VERIFY3U(0, ==, bpobj_open(&subbpo, bpo->bpo_os, subobj));
393 VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp));
394
395 if (!bpobj_hasentries(&subbpo)) {
396 /* No point in having an empty subobj. */
397 bpobj_close(&subbpo);
398 bpobj_free(bpo->bpo_os, subobj, tx);
399 return;
400 }
401
402 mutex_enter(&bpo->bpo_lock);
403 dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
404 if (bpo->bpo_phys->bpo_subobjs == 0) {
405 bpo->bpo_phys->bpo_subobjs = dmu_object_alloc(bpo->bpo_os,
406 DMU_OT_BPOBJ_SUBOBJ, SPA_OLD_MAXBLOCKSIZE,
407 DMU_OT_NONE, 0, tx);
408 }
409
410 dmu_object_info_t doi;
411 ASSERT0(dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi));
412 ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ_SUBOBJ);
413
414 dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
415 bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj),
449 VERIFY3U(0, ==, dmu_object_free(bpo->bpo_os,
450 subsubobjs, tx));
451 }
452 }
453 bpo->bpo_phys->bpo_bytes += used;
454 bpo->bpo_phys->bpo_comp += comp;
455 bpo->bpo_phys->bpo_uncomp += uncomp;
456 mutex_exit(&bpo->bpo_lock);
457
458 bpobj_close(&subbpo);
459 }
460
461 void
462 bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, dmu_tx_t *tx)
463 {
464 blkptr_t stored_bp = *bp;
465 uint64_t offset;
466 int blkoff;
467 blkptr_t *bparray;
468
469 ASSERT(!BP_IS_HOLE(bp));
470 ASSERT(bpo->bpo_object != dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj);
471
472 if (BP_IS_EMBEDDED(bp)) {
473 /*
474 * The bpobj will compress better without the payload.
475 *
476 * Note that we store EMBEDDED bp's because they have an
477 * uncompressed size, which must be accounted for. An
478 * alternative would be to add their size to bpo_uncomp
479 * without storing the bp, but that would create additional
480 * complications: bpo_uncomp would be inconsistent with the
481 * set of BP's stored, and bpobj_iterate() wouldn't visit
482 * all the space accounted for in the bpobj.
483 */
484 bzero(&stored_bp, sizeof (stored_bp));
485 stored_bp.blk_prop = bp->blk_prop;
486 stored_bp.blk_birth = bp->blk_birth;
487 } else if (!BP_GET_DEDUP(bp)) {
488 /* The bpobj will compress better without the checksum */
534 /* ARGSUSED */
535 static int
536 space_range_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
537 {
538 struct space_range_arg *sra = arg;
539
540 if (bp->blk_birth > sra->mintxg && bp->blk_birth <= sra->maxtxg) {
541 if (dsl_pool_sync_context(spa_get_dsl(sra->spa)))
542 sra->used += bp_get_dsize_sync(sra->spa, bp);
543 else
544 sra->used += bp_get_dsize(sra->spa, bp);
545 sra->comp += BP_GET_PSIZE(bp);
546 sra->uncomp += BP_GET_UCSIZE(bp);
547 }
548 return (0);
549 }
550
551 int
552 bpobj_space(bpobj_t *bpo, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
553 {
554 mutex_enter(&bpo->bpo_lock);
555
556 *usedp = bpo->bpo_phys->bpo_bytes;
557 if (bpo->bpo_havecomp) {
558 *compp = bpo->bpo_phys->bpo_comp;
559 *uncompp = bpo->bpo_phys->bpo_uncomp;
560 mutex_exit(&bpo->bpo_lock);
561 return (0);
562 } else {
563 mutex_exit(&bpo->bpo_lock);
564 return (bpobj_space_range(bpo, 0, UINT64_MAX,
565 usedp, compp, uncompp));
566 }
567 }
568
569 /*
570 * Return the amount of space in the bpobj which is:
571 * mintxg < blk_birth <= maxtxg
572 */
573 int
574 bpobj_space_range(bpobj_t *bpo, uint64_t mintxg, uint64_t maxtxg,
575 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
576 {
577 struct space_range_arg sra = { 0 };
578 int err;
579
580 /*
581 * As an optimization, if they want the whole txg range, just
582 * get bpo_bytes rather than iterating over the bps.
583 */
584 if (mintxg < TXG_INITIAL && maxtxg == UINT64_MAX && bpo->bpo_havecomp)
585 return (bpobj_space(bpo, usedp, compp, uncompp));
586
587 sra.spa = dmu_objset_spa(bpo->bpo_os);
588 sra.mintxg = mintxg;
589 sra.maxtxg = maxtxg;
590
591 err = bpobj_iterate_nofree(bpo, space_range_cb, &sra, NULL);
592 *usedp = sra.used;
593 *compp = sra.comp;
594 *uncompp = sra.uncomp;
595 return (err);
596 }
|