Print this page
    
5056 ZFS deadlock on db_mtx and dn_holds
Reviewed by: Will Andrews <willa@spectralogic.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Approved by: Dan McDonald <danmcd@omniti.com>
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/zfs/sys/zap_impl.h
          +++ new/usr/src/uts/common/fs/zfs/sys/zap_impl.h
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  
    | 
      ↓ open down ↓ | 
    13 lines elided | 
    
      ↑ open up ↑ | 
  
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright (c) 2013 by Delphix. All rights reserved.
       24 + * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  24   25   */
  25   26  
  26   27  #ifndef _SYS_ZAP_IMPL_H
  27   28  #define _SYS_ZAP_IMPL_H
  28   29  
  29   30  #include <sys/zap.h>
  30   31  #include <sys/zfs_context.h>
  31   32  #include <sys/avl.h>
  32   33  
  33   34  #ifdef  __cplusplus
  34   35  extern "C" {
  35   36  #endif
  36   37  
  37   38  extern int fzap_default_block_shift;
  38   39  
  39   40  #define ZAP_MAGIC 0x2F52AB2ABULL
  40   41  
  41   42  #define FZAP_BLOCK_SHIFT(zap)   ((zap)->zap_f.zap_block_shift)
  42   43  
  43   44  #define MZAP_ENT_LEN            64
  44   45  #define MZAP_NAME_LEN           (MZAP_ENT_LEN - 8 - 4 - 2)
  45   46  #define MZAP_MAX_BLKSZ          SPA_OLD_MAXBLOCKSIZE
  46   47  
  47   48  #define ZAP_NEED_CD             (-1U)
  48   49  
  49   50  typedef struct mzap_ent_phys {
  50   51          uint64_t mze_value;
  51   52          uint32_t mze_cd;
  52   53          uint16_t mze_pad;       /* in case we want to chain them someday */
  53   54          char mze_name[MZAP_NAME_LEN];
  54   55  } mzap_ent_phys_t;
  55   56  
  56   57  typedef struct mzap_phys {
  57   58          uint64_t mz_block_type; /* ZBT_MICRO */
  58   59          uint64_t mz_salt;
  59   60          uint64_t mz_normflags;
  60   61          uint64_t mz_pad[5];
  61   62          mzap_ent_phys_t mz_chunk[1];
  62   63          /* actually variable size depending on block size */
  63   64  } mzap_phys_t;
  64   65  
  65   66  typedef struct mzap_ent {
  66   67          avl_node_t mze_node;
  67   68          int mze_chunkid;
  68   69          uint64_t mze_hash;
  69   70          uint32_t mze_cd; /* copy from mze_phys->mze_cd */
  70   71  } mzap_ent_t;
  71   72  
  72   73  #define MZE_PHYS(zap, mze) \
  73   74          (&zap_m_phys(zap)->mz_chunk[(mze)->mze_chunkid])
  74   75  
  75   76  /*
  76   77   * The (fat) zap is stored in one object. It is an array of
  77   78   * 1<<FZAP_BLOCK_SHIFT byte blocks. The layout looks like one of:
  78   79   *
  79   80   * ptrtbl fits in first block:
  80   81   *      [zap_phys_t zap_ptrtbl_shift < 6] [zap_leaf_t] ...
  81   82   *
  82   83   * ptrtbl too big for first block:
  83   84   *      [zap_phys_t zap_ptrtbl_shift >= 6] [zap_leaf_t] [ptrtbl] ...
  84   85   *
  85   86   */
  86   87  
  87   88  struct dmu_buf;
  88   89  struct zap_leaf;
  89   90  
  90   91  #define ZBT_LEAF                ((1ULL << 63) + 0)
  91   92  #define ZBT_HEADER              ((1ULL << 63) + 1)
  92   93  #define ZBT_MICRO               ((1ULL << 63) + 3)
  93   94  /* any other values are ptrtbl blocks */
  94   95  
  95   96  /*
  96   97   * the embedded pointer table takes up half a block:
  97   98   * block size / entry size (2^3) / 2
  98   99   */
  99  100  #define ZAP_EMBEDDED_PTRTBL_SHIFT(zap) (FZAP_BLOCK_SHIFT(zap) - 3 - 1)
 100  101  
 101  102  /*
 102  103   * The embedded pointer table starts half-way through the block.  Since
 103  104   * the pointer table itself is half the block, it starts at (64-bit)
 104  105   * word number (1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)).
 105  106   */
 106  107  #define ZAP_EMBEDDED_PTRTBL_ENT(zap, idx) \
 107  108          ((uint64_t *)zap_f_phys(zap)) \
 108  109          [(idx) + (1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap))]
 109  110  
 110  111  /*
 111  112   * TAKE NOTE:
 112  113   * If zap_phys_t is modified, zap_byteswap() must be modified.
 113  114   */
 114  115  typedef struct zap_phys {
 115  116          uint64_t zap_block_type;        /* ZBT_HEADER */
 116  117          uint64_t zap_magic;             /* ZAP_MAGIC */
 117  118  
 118  119          struct zap_table_phys {
 119  120                  uint64_t zt_blk;        /* starting block number */
 120  121                  uint64_t zt_numblks;    /* number of blocks */
 121  122                  uint64_t zt_shift;      /* bits to index it */
 122  123                  uint64_t zt_nextblk;    /* next (larger) copy start block */
 123  124                  uint64_t zt_blks_copied; /* number source blocks copied */
 124  125          } zap_ptrtbl;
 125  126  
 126  127          uint64_t zap_freeblk;           /* the next free block */
 127  128          uint64_t zap_num_leafs;         /* number of leafs */
 128  129          uint64_t zap_num_entries;       /* number of entries */
 129  130          uint64_t zap_salt;              /* salt to stir into hash function */
 130  131          uint64_t zap_normflags;         /* flags for u8_textprep_str() */
 131  132          uint64_t zap_flags;             /* zap_flags_t */
 132  133          /*
  
    | 
      ↓ open down ↓ | 
    99 lines elided | 
    
      ↑ open up ↑ | 
  
 133  134           * This structure is followed by padding, and then the embedded
 134  135           * pointer table.  The embedded pointer table takes up second
 135  136           * half of the block.  It is accessed using the
 136  137           * ZAP_EMBEDDED_PTRTBL_ENT() macro.
 137  138           */
 138  139  } zap_phys_t;
 139  140  
 140  141  typedef struct zap_table_phys zap_table_phys_t;
 141  142  
 142  143  typedef struct zap {
      144 +        dmu_buf_user_t zap_dbu;
 143  145          objset_t *zap_objset;
 144  146          uint64_t zap_object;
 145  147          struct dmu_buf *zap_dbuf;
 146  148          krwlock_t zap_rwlock;
 147  149          boolean_t zap_ismicro;
 148  150          int zap_normflags;
 149  151          uint64_t zap_salt;
 150  152          union {
 151  153                  struct {
 152  154                          /*
 153  155                           * zap_num_entries_mtx protects
 154  156                           * zap_num_entries
 155  157                           */
 156  158                          kmutex_t zap_num_entries_mtx;
 157  159                          int zap_block_shift;
 158  160                  } zap_fat;
 159  161                  struct {
 160  162                          int16_t zap_num_entries;
 161  163                          int16_t zap_num_chunks;
 162  164                          int16_t zap_alloc_next;
 163  165                          avl_tree_t zap_avl;
 164  166                  } zap_micro;
 165  167          } zap_u;
 166  168  } zap_t;
 167  169  
 168  170  inline zap_phys_t *
 169  171  zap_f_phys(zap_t *zap)
 170  172  {
 171  173          return (zap->zap_dbuf->db_data);
 172  174  }
 173  175  
 174  176  inline mzap_phys_t *
 175  177  zap_m_phys(zap_t *zap)
 176  178  {
 177  179          return (zap->zap_dbuf->db_data);
 178  180  }
 179  181  
 180  182  typedef struct zap_name {
 181  183          zap_t *zn_zap;
 182  184          int zn_key_intlen;
 183  185          const void *zn_key_orig;
 184  186          int zn_key_orig_numints;
 185  187          const void *zn_key_norm;
 186  188          int zn_key_norm_numints;
 187  189          uint64_t zn_hash;
 188  190          matchtype_t zn_matchtype;
  
    | 
      ↓ open down ↓ | 
    36 lines elided | 
    
      ↑ open up ↑ | 
  
 189  191          char zn_normbuf[ZAP_MAXNAMELEN];
 190  192  } zap_name_t;
 191  193  
 192  194  #define zap_f   zap_u.zap_fat
 193  195  #define zap_m   zap_u.zap_micro
 194  196  
 195  197  boolean_t zap_match(zap_name_t *zn, const char *matchname);
 196  198  int zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
 197  199      krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp);
 198  200  void zap_unlockdir(zap_t *zap);
 199      -void zap_evict(dmu_buf_t *db, void *vmzap);
      201 +void zap_evict(void *dbu);
 200  202  zap_name_t *zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt);
 201  203  void zap_name_free(zap_name_t *zn);
 202  204  int zap_hashbits(zap_t *zap);
 203  205  uint32_t zap_maxcd(zap_t *zap);
 204  206  uint64_t zap_getflags(zap_t *zap);
 205  207  
 206  208  #define ZAP_HASH_IDX(hash, n) (((n) == 0) ? 0 : ((hash) >> (64 - (n))))
 207  209  
 208  210  void fzap_byteswap(void *buf, size_t size);
 209  211  int fzap_count(zap_t *zap, uint64_t *count);
 210  212  int fzap_lookup(zap_name_t *zn,
 211  213      uint64_t integer_size, uint64_t num_integers, void *buf,
 212  214      char *realname, int rn_len, boolean_t *normalization_conflictp);
 213  215  void fzap_prefetch(zap_name_t *zn);
 214  216  int fzap_count_write(zap_name_t *zn, int add, uint64_t *towrite,
 215  217      uint64_t *tooverwrite);
 216  218  int fzap_add(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers,
 217  219      const void *val, dmu_tx_t *tx);
 218  220  int fzap_update(zap_name_t *zn,
 219  221      int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
 220  222  int fzap_length(zap_name_t *zn,
 221  223      uint64_t *integer_size, uint64_t *num_integers);
 222  224  int fzap_remove(zap_name_t *zn, dmu_tx_t *tx);
 223  225  int fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za);
 224  226  void fzap_get_stats(zap_t *zap, zap_stats_t *zs);
 225  227  void zap_put_leaf(struct zap_leaf *l);
 226  228  
 227  229  int fzap_add_cd(zap_name_t *zn,
 228  230      uint64_t integer_size, uint64_t num_integers,
 229  231      const void *val, uint32_t cd, dmu_tx_t *tx);
 230  232  void fzap_upgrade(zap_t *zap, dmu_tx_t *tx, zap_flags_t flags);
 231  233  
 232  234  #ifdef  __cplusplus
 233  235  }
 234  236  #endif
 235  237  
 236  238  #endif /* _SYS_ZAP_IMPL_H */
  
    | 
      ↓ open down ↓ | 
    27 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX