Print this page
5056 ZFS deadlock on db_mtx and dn_holds
Reviewed by: Will Andrews <willa@spectralogic.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Approved by: Dan McDonald <danmcd@omniti.com>


   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2011, 2014 by Delphix. All rights reserved.

  24  */
  25 
  26 #include <sys/zio.h>
  27 #include <sys/spa.h>
  28 #include <sys/dmu.h>
  29 #include <sys/zfs_context.h>
  30 #include <sys/zap.h>
  31 #include <sys/refcount.h>
  32 #include <sys/zap_impl.h>
  33 #include <sys/zap_leaf.h>
  34 #include <sys/avl.h>
  35 #include <sys/arc.h>
  36 #include <sys/dmu_objset.h>
  37 
  38 #ifdef _KERNEL
  39 #include <sys/sunddi.h>
  40 #endif
  41 
  42 extern inline mzap_phys_t *zap_m_phys(zap_t *zap);
  43 


 370 
 371         zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP);
 372         rw_init(&zap->zap_rwlock, 0, 0, 0);
 373         rw_enter(&zap->zap_rwlock, RW_WRITER);
 374         zap->zap_objset = os;
 375         zap->zap_object = obj;
 376         zap->zap_dbuf = db;
 377 
 378         if (*(uint64_t *)db->db_data != ZBT_MICRO) {
 379                 mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0);
 380                 zap->zap_f.zap_block_shift = highbit64(db->db_size) - 1;
 381         } else {
 382                 zap->zap_ismicro = TRUE;
 383         }
 384 
 385         /*
 386          * Make sure that zap_ismicro is set before we let others see
 387          * it, because zap_lockdir() checks zap_ismicro without the lock
 388          * held.
 389          */
 390         winner = dmu_buf_set_user(db, zap, zap_evict);

 391 
 392         if (winner != NULL) {
 393                 rw_exit(&zap->zap_rwlock);
 394                 rw_destroy(&zap->zap_rwlock);
 395                 if (!zap->zap_ismicro)
 396                         mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
 397                 kmem_free(zap, sizeof (zap_t));
 398                 return (winner);
 399         }
 400 
 401         if (zap->zap_ismicro) {
 402                 zap->zap_salt = zap_m_phys(zap)->mz_salt;
 403                 zap->zap_normflags = zap_m_phys(zap)->mz_normflags;
 404                 zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1;
 405                 avl_create(&zap->zap_m.zap_avl, mze_compare,
 406                     sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node));
 407 
 408                 for (i = 0; i < zap->zap_m.zap_num_chunks; i++) {
 409                         mzap_ent_phys_t *mze =
 410                             &zap_m_phys(zap)->mz_chunk[i];


 662 
 663         VERIFY(dmu_object_set_blocksize(os, obj,
 664             1ULL << leaf_blockshift, indirect_blockshift, tx) == 0);
 665 
 666         mzap_create_impl(os, obj, normflags, flags, tx);
 667         return (obj);
 668 }
 669 
 670 int
 671 zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx)
 672 {
 673         /*
 674          * dmu_object_free will free the object number and free the
 675          * data.  Freeing the data will cause our pageout function to be
 676          * called, which will destroy our data (zap_leaf_t's and zap_t).
 677          */
 678 
 679         return (dmu_object_free(os, zapobj, tx));
 680 }
 681 
 682 _NOTE(ARGSUSED(0))
 683 void
 684 zap_evict(dmu_buf_t *db, void *vzap)
 685 {
 686         zap_t *zap = vzap;
 687 
 688         rw_destroy(&zap->zap_rwlock);
 689 
 690         if (zap->zap_ismicro)
 691                 mze_destroy(zap);
 692         else
 693                 mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
 694 
 695         kmem_free(zap, sizeof (zap_t));
 696 }
 697 
 698 int
 699 zap_count(objset_t *os, uint64_t zapobj, uint64_t *count)
 700 {
 701         zap_t *zap;
 702         int err;
 703 
 704         err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
 705         if (err)
 706                 return (err);




   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
  24  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  25  */
  26 
  27 #include <sys/zio.h>
  28 #include <sys/spa.h>
  29 #include <sys/dmu.h>
  30 #include <sys/zfs_context.h>
  31 #include <sys/zap.h>
  32 #include <sys/refcount.h>
  33 #include <sys/zap_impl.h>
  34 #include <sys/zap_leaf.h>
  35 #include <sys/avl.h>
  36 #include <sys/arc.h>
  37 #include <sys/dmu_objset.h>
  38 
  39 #ifdef _KERNEL
  40 #include <sys/sunddi.h>
  41 #endif
  42 
  43 extern inline mzap_phys_t *zap_m_phys(zap_t *zap);
  44 


 371 
 372         zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP);
 373         rw_init(&zap->zap_rwlock, 0, 0, 0);
 374         rw_enter(&zap->zap_rwlock, RW_WRITER);
 375         zap->zap_objset = os;
 376         zap->zap_object = obj;
 377         zap->zap_dbuf = db;
 378 
 379         if (*(uint64_t *)db->db_data != ZBT_MICRO) {
 380                 mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0);
 381                 zap->zap_f.zap_block_shift = highbit64(db->db_size) - 1;
 382         } else {
 383                 zap->zap_ismicro = TRUE;
 384         }
 385 
 386         /*
 387          * Make sure that zap_ismicro is set before we let others see
 388          * it, because zap_lockdir() checks zap_ismicro without the lock
 389          * held.
 390          */
 391         dmu_buf_init_user(&zap->zap_dbu, zap_evict, &zap->zap_dbuf);
 392         winner = dmu_buf_set_user(db, &zap->zap_dbu);
 393 
 394         if (winner != NULL) {
 395                 rw_exit(&zap->zap_rwlock);
 396                 rw_destroy(&zap->zap_rwlock);
 397                 if (!zap->zap_ismicro)
 398                         mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
 399                 kmem_free(zap, sizeof (zap_t));
 400                 return (winner);
 401         }
 402 
 403         if (zap->zap_ismicro) {
 404                 zap->zap_salt = zap_m_phys(zap)->mz_salt;
 405                 zap->zap_normflags = zap_m_phys(zap)->mz_normflags;
 406                 zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1;
 407                 avl_create(&zap->zap_m.zap_avl, mze_compare,
 408                     sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node));
 409 
 410                 for (i = 0; i < zap->zap_m.zap_num_chunks; i++) {
 411                         mzap_ent_phys_t *mze =
 412                             &zap_m_phys(zap)->mz_chunk[i];


 664 
 665         VERIFY(dmu_object_set_blocksize(os, obj,
 666             1ULL << leaf_blockshift, indirect_blockshift, tx) == 0);
 667 
 668         mzap_create_impl(os, obj, normflags, flags, tx);
 669         return (obj);
 670 }
 671 
 672 int
 673 zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx)
 674 {
 675         /*
 676          * dmu_object_free will free the object number and free the
 677          * data.  Freeing the data will cause our pageout function to be
 678          * called, which will destroy our data (zap_leaf_t's and zap_t).
 679          */
 680 
 681         return (dmu_object_free(os, zapobj, tx));
 682 }
 683 

 684 void
 685 zap_evict(void *dbu)
 686 {
 687         zap_t *zap = dbu;
 688 
 689         rw_destroy(&zap->zap_rwlock);
 690 
 691         if (zap->zap_ismicro)
 692                 mze_destroy(zap);
 693         else
 694                 mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
 695 
 696         kmem_free(zap, sizeof (zap_t));
 697 }
 698 
 699 int
 700 zap_count(objset_t *os, uint64_t zapobj, uint64_t *count)
 701 {
 702         zap_t *zap;
 703         int err;
 704 
 705         err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
 706         if (err)
 707                 return (err);