350 kmem_free(mze, sizeof (mzap_ent_t));
351 }
352
353 static void
354 mze_destroy(zap_t *zap)
355 {
356 mzap_ent_t *mze;
357 void *avlcookie = NULL;
358
359 while (mze = avl_destroy_nodes(&zap->zap_m.zap_avl, &avlcookie))
360 kmem_free(mze, sizeof (mzap_ent_t));
361 avl_destroy(&zap->zap_m.zap_avl);
362 }
363
364 static zap_t *
365 mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
366 {
367 zap_t *winner;
368 zap_t *zap;
369 int i;
370
371 ASSERT3U(MZAP_ENT_LEN, ==, sizeof (mzap_ent_phys_t));
372
373 zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP);
374 rw_init(&zap->zap_rwlock, 0, 0, 0);
375 rw_enter(&zap->zap_rwlock, RW_WRITER);
376 zap->zap_objset = os;
377 zap->zap_object = obj;
378 zap->zap_dbuf = db;
379
380 if (*(uint64_t *)db->db_data != ZBT_MICRO) {
381 mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0);
382 zap->zap_f.zap_block_shift = highbit64(db->db_size) - 1;
383 } else {
384 zap->zap_ismicro = TRUE;
385 }
386
387 /*
388 * Make sure that zap_ismicro is set before we let others see
389 * it, because zap_lockdir() checks zap_ismicro without the lock
390 * held.
391 */
392 dmu_buf_init_user(&zap->zap_dbu, zap_evict, &zap->zap_dbuf);
393 winner = dmu_buf_set_user(db, &zap->zap_dbu);
394
395 if (winner != NULL) {
396 rw_exit(&zap->zap_rwlock);
397 rw_destroy(&zap->zap_rwlock);
398 if (!zap->zap_ismicro)
399 mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
400 kmem_free(zap, sizeof (zap_t));
401 return (winner);
402 }
403
404 if (zap->zap_ismicro) {
405 zap->zap_salt = zap_m_phys(zap)->mz_salt;
406 zap->zap_normflags = zap_m_phys(zap)->mz_normflags;
407 zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1;
408 avl_create(&zap->zap_m.zap_avl, mze_compare,
409 sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node));
410
411 for (i = 0; i < zap->zap_m.zap_num_chunks; i++) {
412 mzap_ent_phys_t *mze =
413 &zap_m_phys(zap)->mz_chunk[i];
414 if (mze->mze_name[0]) {
415 zap_name_t *zn;
416
417 zap->zap_m.zap_num_entries++;
418 zn = zap_name_alloc(zap, mze->mze_name,
419 MT_EXACT);
420 mze_insert(zap, i, zn->zn_hash);
421 zap_name_free(zn);
422 }
429 2*ZAP_LEAF_CHUNKSIZE);
430
431 /*
432 * The embedded pointer table should not overlap the
433 * other members.
434 */
435 ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >,
436 &zap_f_phys(zap)->zap_salt);
437
438 /*
439 * The embedded pointer table should end at the end of
440 * the block
441 */
442 ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap,
443 1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)) -
444 (uintptr_t)zap_f_phys(zap), ==,
445 zap->zap_dbuf->db_size);
446 }
447 rw_exit(&zap->zap_rwlock);
448 return (zap);
449 }
450
451 int
452 zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
453 krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp)
454 {
455 zap_t *zap;
456 dmu_buf_t *db;
457 krw_t lt;
458 int err;
459
460 *zapp = NULL;
461
462 err = dmu_buf_hold(os, obj, 0, NULL, &db, DMU_READ_NO_PREFETCH);
463 if (err)
464 return (err);
465
466 #ifdef ZFS_DEBUG
467 {
468 dmu_object_info_t doi;
469 dmu_object_info_from_db(db, &doi);
470 ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
471 }
472 #endif
473
474 zap = dmu_buf_get_user(db);
475 if (zap == NULL)
476 zap = mzap_open(os, obj, db);
477
478 /*
479 * We're checking zap_ismicro without the lock held, in order to
480 * tell what type of lock we want. Once we have some sort of
481 * lock, see if it really is the right type. In practice this
482 * can only be different if it was upgraded from micro to fat,
483 * and micro wanted WRITER but fat only needs READER.
484 */
485 lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti;
486 rw_enter(&zap->zap_rwlock, lt);
487 if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) {
488 /* it was upgraded, now we only need reader */
489 ASSERT(lt == RW_WRITER);
490 ASSERT(RW_READER ==
491 (!zap->zap_ismicro && fatreader) ? RW_READER : lti);
492 rw_downgrade(&zap->zap_rwlock);
493 lt = RW_READER;
494 }
495
496 zap->zap_objset = os;
|
350 kmem_free(mze, sizeof (mzap_ent_t));
351 }
352
353 static void
354 mze_destroy(zap_t *zap)
355 {
356 mzap_ent_t *mze;
357 void *avlcookie = NULL;
358
359 while (mze = avl_destroy_nodes(&zap->zap_m.zap_avl, &avlcookie))
360 kmem_free(mze, sizeof (mzap_ent_t));
361 avl_destroy(&zap->zap_m.zap_avl);
362 }
363
364 static zap_t *
365 mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
366 {
367 zap_t *winner;
368 zap_t *zap;
369 int i;
370 uint64_t *zap_hdr = (uint64_t *)db->db_data;
371 uint64_t zap_block_type = zap_hdr[0];
372 uint64_t zap_magic = zap_hdr[1];
373
374 ASSERT3U(MZAP_ENT_LEN, ==, sizeof (mzap_ent_phys_t));
375
376 zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP);
377 rw_init(&zap->zap_rwlock, 0, 0, 0);
378 rw_enter(&zap->zap_rwlock, RW_WRITER);
379 zap->zap_objset = os;
380 zap->zap_object = obj;
381 zap->zap_dbuf = db;
382
383 if (zap_block_type != ZBT_MICRO) {
384 mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0);
385 zap->zap_f.zap_block_shift = highbit64(db->db_size) - 1;
386 if (zap_block_type != ZBT_HEADER || zap_magic != ZAP_MAGIC) {
387 winner = NULL; /* No actual winner here... */
388 goto handle_winner;
389 }
390 } else {
391 zap->zap_ismicro = TRUE;
392 }
393
394 /*
395 * Make sure that zap_ismicro is set before we let others see
396 * it, because zap_lockdir() checks zap_ismicro without the lock
397 * held.
398 */
399 dmu_buf_init_user(&zap->zap_dbu, zap_evict, &zap->zap_dbuf);
400 winner = dmu_buf_set_user(db, &zap->zap_dbu);
401
402 if (winner != NULL)
403 goto handle_winner;
404
405 if (zap->zap_ismicro) {
406 zap->zap_salt = zap_m_phys(zap)->mz_salt;
407 zap->zap_normflags = zap_m_phys(zap)->mz_normflags;
408 zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1;
409 avl_create(&zap->zap_m.zap_avl, mze_compare,
410 sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node));
411
412 for (i = 0; i < zap->zap_m.zap_num_chunks; i++) {
413 mzap_ent_phys_t *mze =
414 &zap_m_phys(zap)->mz_chunk[i];
415 if (mze->mze_name[0]) {
416 zap_name_t *zn;
417
418 zap->zap_m.zap_num_entries++;
419 zn = zap_name_alloc(zap, mze->mze_name,
420 MT_EXACT);
421 mze_insert(zap, i, zn->zn_hash);
422 zap_name_free(zn);
423 }
430 2*ZAP_LEAF_CHUNKSIZE);
431
432 /*
433 * The embedded pointer table should not overlap the
434 * other members.
435 */
436 ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >,
437 &zap_f_phys(zap)->zap_salt);
438
439 /*
440 * The embedded pointer table should end at the end of
441 * the block
442 */
443 ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap,
444 1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)) -
445 (uintptr_t)zap_f_phys(zap), ==,
446 zap->zap_dbuf->db_size);
447 }
448 rw_exit(&zap->zap_rwlock);
449 return (zap);
450
451 handle_winner:
452 rw_exit(&zap->zap_rwlock);
453 rw_destroy(&zap->zap_rwlock);
454 if (!zap->zap_ismicro)
455 mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
456 kmem_free(zap, sizeof (zap_t));
457 return (winner);
458 }
459
460 int
461 zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
462 krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp)
463 {
464 zap_t *zap;
465 dmu_buf_t *db;
466 krw_t lt;
467 int err;
468
469 *zapp = NULL;
470
471 err = dmu_buf_hold(os, obj, 0, NULL, &db, DMU_READ_NO_PREFETCH);
472 if (err)
473 return (err);
474
475 #ifdef ZFS_DEBUG
476 {
477 dmu_object_info_t doi;
478 dmu_object_info_from_db(db, &doi);
479 ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
480 }
481 #endif
482
483 zap = dmu_buf_get_user(db);
484 if (zap == NULL) {
485 zap = mzap_open(os, obj, db);
486 if (zap == NULL) {
487 /*
488 * mzap_open() didn't like what it saw on-disk.
489 * Check for corruption!
490 */
491 dmu_buf_rele(db, NULL);
492 return (SET_ERROR(EIO));
493 }
494 }
495
496 /*
497 * We're checking zap_ismicro without the lock held, in order to
498 * tell what type of lock we want. Once we have some sort of
499 * lock, see if it really is the right type. In practice this
500 * can only be different if it was upgraded from micro to fat,
501 * and micro wanted WRITER but fat only needs READER.
502 */
503 lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti;
504 rw_enter(&zap->zap_rwlock, lt);
505 if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) {
506 /* it was upgraded, now we only need reader */
507 ASSERT(lt == RW_WRITER);
508 ASSERT(RW_READER ==
509 (!zap->zap_ismicro && fatreader) ? RW_READER : lti);
510 rw_downgrade(&zap->zap_rwlock);
511 lt = RW_READER;
512 }
513
514 zap->zap_objset = os;
|