Print this page
    
5056 ZFS deadlock on db_mtx and dn_holds
Reviewed by: Will Andrews <willa@spectralogic.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Approved by: Dan McDonald <danmcd@omniti.com>
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/zfs/sys/zap_leaf.h
          +++ new/usr/src/uts/common/fs/zfs/sys/zap_leaf.h
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  
    | 
      ↓ open down ↓ | 
    12 lines elided | 
    
      ↑ open up ↑ | 
  
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
       23 + * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  23   24   */
  24   25  
  25   26  #ifndef _SYS_ZAP_LEAF_H
  26   27  #define _SYS_ZAP_LEAF_H
  27   28  
  28   29  #include <sys/zap.h>
  29   30  
  30   31  #ifdef  __cplusplus
  31   32  extern "C" {
  32   33  #endif
  33   34  
  34   35  struct zap;
  35   36  struct zap_name;
  36   37  struct zap_stats;
  37   38  
  38   39  #define ZAP_LEAF_MAGIC 0x2AB1EAF
  39   40  
  40   41  /* chunk size = 24 bytes */
  41   42  #define ZAP_LEAF_CHUNKSIZE 24
  42   43  
  43   44  /*
  44   45   * The amount of space available for chunks is:
  45   46   * block size (1<<l->l_bs) - hash entry size (2) * number of hash
  46   47   * entries - header space (2*chunksize)
  47   48   */
  48   49  #define ZAP_LEAF_NUMCHUNKS(l) \
  49   50          (((1<<(l)->l_bs) - 2*ZAP_LEAF_HASH_NUMENTRIES(l)) / \
  50   51          ZAP_LEAF_CHUNKSIZE - 2)
  51   52  
  52   53  /*
  53   54   * The amount of space within the chunk available for the array is:
  54   55   * chunk size - space for type (1) - space for next pointer (2)
  55   56   */
  56   57  #define ZAP_LEAF_ARRAY_BYTES (ZAP_LEAF_CHUNKSIZE - 3)
  57   58  
  58   59  #define ZAP_LEAF_ARRAY_NCHUNKS(bytes) \
  59   60          (((bytes)+ZAP_LEAF_ARRAY_BYTES-1)/ZAP_LEAF_ARRAY_BYTES)
  60   61  
  61   62  /*
  62   63   * Low water mark:  when there are only this many chunks free, start
  63   64   * growing the ptrtbl.  Ideally, this should be larger than a
  64   65   * "reasonably-sized" entry.  20 chunks is more than enough for the
  65   66   * largest directory entry (MAXNAMELEN (256) byte name, 8-byte value),
  66   67   * while still being only around 3% for 16k blocks.
  67   68   */
  68   69  #define ZAP_LEAF_LOW_WATER (20)
  69   70  
  70   71  /*
  71   72   * The leaf hash table has block size / 2^5 (32) number of entries,
  72   73   * which should be more than enough for the maximum number of entries,
  73   74   * which is less than block size / CHUNKSIZE (24) / minimum number of
  74   75   * chunks per entry (3).
  75   76   */
  76   77  #define ZAP_LEAF_HASH_SHIFT(l) ((l)->l_bs - 5)
  77   78  #define ZAP_LEAF_HASH_NUMENTRIES(l) (1 << ZAP_LEAF_HASH_SHIFT(l))
  78   79  
  79   80  /*
  80   81   * The chunks start immediately after the hash table.  The end of the
  81   82   * hash table is at l_hash + HASH_NUMENTRIES, which we simply cast to a
  82   83   * chunk_t.
  83   84   */
  84   85  #define ZAP_LEAF_CHUNK(l, idx) \
  85   86          ((zap_leaf_chunk_t *) \
  86   87          (zap_leaf_phys(l)->l_hash + ZAP_LEAF_HASH_NUMENTRIES(l)))[idx]
  87   88  #define ZAP_LEAF_ENTRY(l, idx) (&ZAP_LEAF_CHUNK(l, idx).l_entry)
  88   89  
  89   90  typedef enum zap_chunk_type {
  90   91          ZAP_CHUNK_FREE = 253,
  91   92          ZAP_CHUNK_ENTRY = 252,
  92   93          ZAP_CHUNK_ARRAY = 251,
  93   94          ZAP_CHUNK_TYPE_MAX = 250
  94   95  } zap_chunk_type_t;
  95   96  
  96   97  #define ZLF_ENTRIES_CDSORTED (1<<0)
  97   98  
  98   99  /*
  99  100   * TAKE NOTE:
 100  101   * If zap_leaf_phys_t is modified, zap_leaf_byteswap() must be modified.
 101  102   */
 102  103  typedef struct zap_leaf_phys {
 103  104          struct zap_leaf_header {
 104  105                  /* Public to ZAP */
 105  106                  uint64_t lh_block_type;         /* ZBT_LEAF */
 106  107                  uint64_t lh_pad1;
 107  108                  uint64_t lh_prefix;             /* hash prefix of this leaf */
 108  109                  uint32_t lh_magic;              /* ZAP_LEAF_MAGIC */
 109  110                  uint16_t lh_nfree;              /* number free chunks */
 110  111                  uint16_t lh_nentries;           /* number of entries */
 111  112                  uint16_t lh_prefix_len;         /* num bits used to id this */
 112  113  
 113  114                  /* Private to zap_leaf */
 114  115                  uint16_t lh_freelist;           /* chunk head of free list */
 115  116                  uint8_t lh_flags;               /* ZLF_* flags */
 116  117                  uint8_t lh_pad2[11];
 117  118          } l_hdr; /* 2 24-byte chunks */
 118  119  
 119  120          /*
 120  121           * The header is followed by a hash table with
 121  122           * ZAP_LEAF_HASH_NUMENTRIES(zap) entries.  The hash table is
 122  123           * followed by an array of ZAP_LEAF_NUMCHUNKS(zap)
 123  124           * zap_leaf_chunk structures.  These structures are accessed
 124  125           * with the ZAP_LEAF_CHUNK() macro.
 125  126           */
 126  127  
 127  128          uint16_t l_hash[1];
 128  129  } zap_leaf_phys_t;
 129  130  
 130  131  typedef union zap_leaf_chunk {
 131  132          struct zap_leaf_entry {
 132  133                  uint8_t le_type;                /* always ZAP_CHUNK_ENTRY */
 133  134                  uint8_t le_value_intlen;        /* size of value's ints */
 134  135                  uint16_t le_next;               /* next entry in hash chain */
 135  136                  uint16_t le_name_chunk;         /* first chunk of the name */
 136  137                  uint16_t le_name_numints;       /* ints in name (incl null) */
 137  138                  uint16_t le_value_chunk;        /* first chunk of the value */
 138  139                  uint16_t le_value_numints;      /* value length in ints */
 139  140                  uint32_t le_cd;                 /* collision differentiator */
 140  141                  uint64_t le_hash;               /* hash value of the name */
 141  142          } l_entry;
 142  143          struct zap_leaf_array {
 143  144                  uint8_t la_type;                /* always ZAP_CHUNK_ARRAY */
 144  145                  uint8_t la_array[ZAP_LEAF_ARRAY_BYTES];
  
    | 
      ↓ open down ↓ | 
    112 lines elided | 
    
      ↑ open up ↑ | 
  
 145  146                  uint16_t la_next;               /* next blk or CHAIN_END */
 146  147          } l_array;
 147  148          struct zap_leaf_free {
 148  149                  uint8_t lf_type;                /* always ZAP_CHUNK_FREE */
 149  150                  uint8_t lf_pad[ZAP_LEAF_ARRAY_BYTES];
 150  151                  uint16_t lf_next;       /* next in free list, or CHAIN_END */
 151  152          } l_free;
 152  153  } zap_leaf_chunk_t;
 153  154  
 154  155  typedef struct zap_leaf {
      156 +        dmu_buf_user_t l_dbu;
 155  157          krwlock_t l_rwlock;
 156  158          uint64_t l_blkid;               /* 1<<ZAP_BLOCK_SHIFT byte block off */
 157  159          int l_bs;                       /* block size shift */
 158  160          dmu_buf_t *l_dbuf;
 159  161  } zap_leaf_t;
 160  162  
 161  163  inline zap_leaf_phys_t *
 162  164  zap_leaf_phys(zap_leaf_t *l)
 163  165  {
 164  166          return (l->l_dbuf->db_data);
 165  167  }
 166  168  
 167  169  typedef struct zap_entry_handle {
 168  170          /* Set by zap_leaf and public to ZAP */
 169  171          uint64_t zeh_num_integers;
 170  172          uint64_t zeh_hash;
 171  173          uint32_t zeh_cd;
 172  174          uint8_t zeh_integer_size;
 173  175  
 174  176          /* Private to zap_leaf */
 175  177          uint16_t zeh_fakechunk;
 176  178          uint16_t *zeh_chunkp;
 177  179          zap_leaf_t *zeh_leaf;
 178  180  } zap_entry_handle_t;
 179  181  
 180  182  /*
 181  183   * Return a handle to the named entry, or ENOENT if not found.  The hash
 182  184   * value must equal zap_hash(name).
 183  185   */
 184  186  extern int zap_leaf_lookup(zap_leaf_t *l,
 185  187      struct zap_name *zn, zap_entry_handle_t *zeh);
 186  188  
 187  189  /*
 188  190   * Return a handle to the entry with this hash+cd, or the entry with the
 189  191   * next closest hash+cd.
 190  192   */
 191  193  extern int zap_leaf_lookup_closest(zap_leaf_t *l,
 192  194      uint64_t hash, uint32_t cd, zap_entry_handle_t *zeh);
 193  195  
 194  196  /*
 195  197   * Read the first num_integers in the attribute.  Integer size
 196  198   * conversion will be done without sign extension.  Return EINVAL if
 197  199   * integer_size is too small.  Return EOVERFLOW if there are more than
 198  200   * num_integers in the attribute.
 199  201   */
 200  202  extern int zap_entry_read(const zap_entry_handle_t *zeh,
 201  203      uint8_t integer_size, uint64_t num_integers, void *buf);
 202  204  
 203  205  extern int zap_entry_read_name(struct zap *zap, const zap_entry_handle_t *zeh,
 204  206      uint16_t buflen, char *buf);
 205  207  
 206  208  /*
 207  209   * Replace the value of an existing entry.
 208  210   *
 209  211   * May fail if it runs out of space (ENOSPC).
 210  212   */
 211  213  extern int zap_entry_update(zap_entry_handle_t *zeh,
 212  214      uint8_t integer_size, uint64_t num_integers, const void *buf);
 213  215  
 214  216  /*
 215  217   * Remove an entry.
 216  218   */
 217  219  extern void zap_entry_remove(zap_entry_handle_t *zeh);
 218  220  
 219  221  /*
 220  222   * Create an entry. An equal entry must not exist, and this entry must
 221  223   * belong in this leaf (according to its hash value).  Fills in the
 222  224   * entry handle on success.  Returns 0 on success or ENOSPC on failure.
 223  225   */
 224  226  extern int zap_entry_create(zap_leaf_t *l, struct zap_name *zn, uint32_t cd,
 225  227      uint8_t integer_size, uint64_t num_integers, const void *buf,
 226  228      zap_entry_handle_t *zeh);
 227  229  
 228  230  /* Determine whether there is another entry with the same normalized form. */
 229  231  extern boolean_t zap_entry_normalization_conflict(zap_entry_handle_t *zeh,
 230  232      struct zap_name *zn, const char *name, struct zap *zap);
 231  233  
 232  234  /*
 233  235   * Other stuff.
 234  236   */
 235  237  
 236  238  extern void zap_leaf_init(zap_leaf_t *l, boolean_t sort);
 237  239  extern void zap_leaf_byteswap(zap_leaf_phys_t *buf, int len);
 238  240  extern void zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort);
 239  241  extern void zap_leaf_stats(struct zap *zap, zap_leaf_t *l,
 240  242      struct zap_stats *zs);
 241  243  
 242  244  #ifdef  __cplusplus
 243  245  }
 244  246  #endif
 245  247  
 246  248  #endif /* _SYS_ZAP_LEAF_H */
  
    | 
      ↓ open down ↓ | 
    82 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX