350         kmem_free(mze, sizeof (mzap_ent_t));
 351 }
 352 
 353 static void
 354 mze_destroy(zap_t *zap)
 355 {
 356         mzap_ent_t *mze;
 357         void *avlcookie = NULL;
 358 
 359         while (mze = avl_destroy_nodes(&zap->zap_m.zap_avl, &avlcookie))
 360                 kmem_free(mze, sizeof (mzap_ent_t));
 361         avl_destroy(&zap->zap_m.zap_avl);
 362 }
 363 
 364 static zap_t *
 365 mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
 366 {
 367         zap_t *winner;
 368         zap_t *zap;
 369         int i;
 370 
 371         ASSERT3U(MZAP_ENT_LEN, ==, sizeof (mzap_ent_phys_t));
 372 
 373         zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP);
 374         rw_init(&zap->zap_rwlock, 0, 0, 0);
 375         rw_enter(&zap->zap_rwlock, RW_WRITER);
 376         zap->zap_objset = os;
 377         zap->zap_object = obj;
 378         zap->zap_dbuf = db;
 379 
 380         if (*(uint64_t *)db->db_data != ZBT_MICRO) {
 381                 mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0);
 382                 zap->zap_f.zap_block_shift = highbit64(db->db_size) - 1;
 383         } else {
 384                 zap->zap_ismicro = TRUE;
 385         }
 386 
 387         /*
 388          * Make sure that zap_ismicro is set before we let others see
 389          * it, because zap_lockdir() checks zap_ismicro without the lock
 390          * held.
 391          */
 392         dmu_buf_init_user(&zap->zap_dbu, zap_evict, &zap->zap_dbuf);
 393         winner = dmu_buf_set_user(db, &zap->zap_dbu);
 394 
 395         if (winner != NULL) {
 396                 rw_exit(&zap->zap_rwlock);
 397                 rw_destroy(&zap->zap_rwlock);
 398                 if (!zap->zap_ismicro)
 399                         mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
 400                 kmem_free(zap, sizeof (zap_t));
 401                 return (winner);
 402         }
 403 
 404         if (zap->zap_ismicro) {
 405                 zap->zap_salt = zap_m_phys(zap)->mz_salt;
 406                 zap->zap_normflags = zap_m_phys(zap)->mz_normflags;
 407                 zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1;
 408                 avl_create(&zap->zap_m.zap_avl, mze_compare,
 409                     sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node));
 410 
 411                 for (i = 0; i < zap->zap_m.zap_num_chunks; i++) {
 412                         mzap_ent_phys_t *mze =
 413                             &zap_m_phys(zap)->mz_chunk[i];
 414                         if (mze->mze_name[0]) {
 415                                 zap_name_t *zn;
 416 
 417                                 zap->zap_m.zap_num_entries++;
 418                                 zn = zap_name_alloc(zap, mze->mze_name,
 419                                     MT_EXACT);
 420                                 mze_insert(zap, i, zn->zn_hash);
 421                                 zap_name_free(zn);
 422                         }
 
 429                     2*ZAP_LEAF_CHUNKSIZE);
 430 
 431                 /*
 432                  * The embedded pointer table should not overlap the
 433                  * other members.
 434                  */
 435                 ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >,
 436                     &zap_f_phys(zap)->zap_salt);
 437 
 438                 /*
 439                  * The embedded pointer table should end at the end of
 440                  * the block
 441                  */
 442                 ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap,
 443                     1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)) -
 444                     (uintptr_t)zap_f_phys(zap), ==,
 445                     zap->zap_dbuf->db_size);
 446         }
 447         rw_exit(&zap->zap_rwlock);
 448         return (zap);
 449 }
 450 
 451 int
 452 zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
 453     krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp)
 454 {
 455         zap_t *zap;
 456         dmu_buf_t *db;
 457         krw_t lt;
 458         int err;
 459 
 460         *zapp = NULL;
 461 
 462         err = dmu_buf_hold(os, obj, 0, NULL, &db, DMU_READ_NO_PREFETCH);
 463         if (err)
 464                 return (err);
 465 
 466 #ifdef ZFS_DEBUG
 467         {
 468                 dmu_object_info_t doi;
 469                 dmu_object_info_from_db(db, &doi);
 470                 ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
 471         }
 472 #endif
 473 
 474         zap = dmu_buf_get_user(db);
 475         if (zap == NULL)
 476                 zap = mzap_open(os, obj, db);
 477 
 478         /*
 479          * We're checking zap_ismicro without the lock held, in order to
 480          * tell what type of lock we want.  Once we have some sort of
 481          * lock, see if it really is the right type.  In practice this
 482          * can only be different if it was upgraded from micro to fat,
 483          * and micro wanted WRITER but fat only needs READER.
 484          */
 485         lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti;
 486         rw_enter(&zap->zap_rwlock, lt);
 487         if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) {
 488                 /* it was upgraded, now we only need reader */
 489                 ASSERT(lt == RW_WRITER);
 490                 ASSERT(RW_READER ==
 491                     (!zap->zap_ismicro && fatreader) ? RW_READER : lti);
 492                 rw_downgrade(&zap->zap_rwlock);
 493                 lt = RW_READER;
 494         }
 495 
 496         zap->zap_objset = os;
 
 | 
 
 
 350         kmem_free(mze, sizeof (mzap_ent_t));
 351 }
 352 
 353 static void
 354 mze_destroy(zap_t *zap)
 355 {
 356         mzap_ent_t *mze;
 357         void *avlcookie = NULL;
 358 
 359         while (mze = avl_destroy_nodes(&zap->zap_m.zap_avl, &avlcookie))
 360                 kmem_free(mze, sizeof (mzap_ent_t));
 361         avl_destroy(&zap->zap_m.zap_avl);
 362 }
 363 
 364 static zap_t *
 365 mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
 366 {
 367         zap_t *winner;
 368         zap_t *zap;
 369         int i;
 370         uint64_t *zap_hdr = (uint64_t *)db->db_data;
 371         uint64_t zap_block_type = zap_hdr[0];
 372         uint64_t zap_magic = zap_hdr[1];
 373 
 374         ASSERT3U(MZAP_ENT_LEN, ==, sizeof (mzap_ent_phys_t));
 375 
 376         zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP);
 377         rw_init(&zap->zap_rwlock, 0, 0, 0);
 378         rw_enter(&zap->zap_rwlock, RW_WRITER);
 379         zap->zap_objset = os;
 380         zap->zap_object = obj;
 381         zap->zap_dbuf = db;
 382 
 383         if (zap_block_type != ZBT_MICRO) {
 384                 mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0);
 385                 zap->zap_f.zap_block_shift = highbit64(db->db_size) - 1;
 386                 if (zap_block_type != ZBT_HEADER || zap_magic != ZAP_MAGIC) {
 387                         winner = NULL;  /* No actual winner here... */
 388                         goto handle_winner;
 389                 }
 390         } else {
 391                 zap->zap_ismicro = TRUE;
 392         }
 393 
 394         /*
 395          * Make sure that zap_ismicro is set before we let others see
 396          * it, because zap_lockdir() checks zap_ismicro without the lock
 397          * held.
 398          */
 399         dmu_buf_init_user(&zap->zap_dbu, zap_evict, &zap->zap_dbuf);
 400         winner = dmu_buf_set_user(db, &zap->zap_dbu);
 401 
 402         if (winner != NULL)
 403                 goto handle_winner;
 404 
 405         if (zap->zap_ismicro) {
 406                 zap->zap_salt = zap_m_phys(zap)->mz_salt;
 407                 zap->zap_normflags = zap_m_phys(zap)->mz_normflags;
 408                 zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1;
 409                 avl_create(&zap->zap_m.zap_avl, mze_compare,
 410                     sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node));
 411 
 412                 for (i = 0; i < zap->zap_m.zap_num_chunks; i++) {
 413                         mzap_ent_phys_t *mze =
 414                             &zap_m_phys(zap)->mz_chunk[i];
 415                         if (mze->mze_name[0]) {
 416                                 zap_name_t *zn;
 417 
 418                                 zap->zap_m.zap_num_entries++;
 419                                 zn = zap_name_alloc(zap, mze->mze_name,
 420                                     MT_EXACT);
 421                                 mze_insert(zap, i, zn->zn_hash);
 422                                 zap_name_free(zn);
 423                         }
 
 430                     2*ZAP_LEAF_CHUNKSIZE);
 431 
 432                 /*
 433                  * The embedded pointer table should not overlap the
 434                  * other members.
 435                  */
 436                 ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >,
 437                     &zap_f_phys(zap)->zap_salt);
 438 
 439                 /*
 440                  * The embedded pointer table should end at the end of
 441                  * the block
 442                  */
 443                 ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap,
 444                     1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)) -
 445                     (uintptr_t)zap_f_phys(zap), ==,
 446                     zap->zap_dbuf->db_size);
 447         }
 448         rw_exit(&zap->zap_rwlock);
 449         return (zap);
 450 
 451 handle_winner:
 452         rw_exit(&zap->zap_rwlock);
 453         rw_destroy(&zap->zap_rwlock);
 454         if (!zap->zap_ismicro)
 455                 mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
 456         kmem_free(zap, sizeof (zap_t));
 457         return (winner);
 458 }
 459 
 460 int
 461 zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
 462     krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp)
 463 {
 464         zap_t *zap;
 465         dmu_buf_t *db;
 466         krw_t lt;
 467         int err;
 468 
 469         *zapp = NULL;
 470 
 471         err = dmu_buf_hold(os, obj, 0, NULL, &db, DMU_READ_NO_PREFETCH);
 472         if (err)
 473                 return (err);
 474 
 475 #ifdef ZFS_DEBUG
 476         {
 477                 dmu_object_info_t doi;
 478                 dmu_object_info_from_db(db, &doi);
 479                 ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
 480         }
 481 #endif
 482 
 483         zap = dmu_buf_get_user(db);
 484         if (zap == NULL) {
 485                 zap = mzap_open(os, obj, db);
 486                 if (zap == NULL) {
 487                         /*
 488                          * mzap_open() didn't like what it saw on-disk.
 489                          * Check for corruption!
 490                          */
 491                         dmu_buf_rele(db, NULL);
 492                         return (SET_ERROR(EIO));
 493                 }
 494         }
 495 
 496         /*
 497          * We're checking zap_ismicro without the lock held, in order to
 498          * tell what type of lock we want.  Once we have some sort of
 499          * lock, see if it really is the right type.  In practice this
 500          * can only be different if it was upgraded from micro to fat,
 501          * and micro wanted WRITER but fat only needs READER.
 502          */
 503         lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti;
 504         rw_enter(&zap->zap_rwlock, lt);
 505         if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) {
 506                 /* it was upgraded, now we only need reader */
 507                 ASSERT(lt == RW_WRITER);
 508                 ASSERT(RW_READER ==
 509                     (!zap->zap_ismicro && fatreader) ? RW_READER : lti);
 510                 rw_downgrade(&zap->zap_rwlock);
 511                 lt = RW_READER;
 512         }
 513 
 514         zap->zap_objset = os;
 
 |