Print this page
NEX-3165 need some dedup improvements
Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
re #12611 rb4105 zpool import panic in ddt_zap_count()
re #12585 rb4049 ZFS++ work port - refactoring to improve separation of open/closed code, bug fixes, performance improvements - open code

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/sys/ddt.h
          +++ new/usr/src/uts/common/fs/zfs/sys/ddt.h
↓ open down ↓ 12 lines elided ↑ open up ↑
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
       23 + * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
  23   24   * Copyright (c) 2016 by Delphix. All rights reserved.
  24   25   */
  25   26  
  26   27  #ifndef _SYS_DDT_H
  27   28  #define _SYS_DDT_H
  28   29  
  29   30  #include <sys/sysmacros.h>
  30   31  #include <sys/types.h>
  31   32  #include <sys/fs/zfs.h>
  32   33  #include <sys/zio.h>
↓ open down ↓ 16 lines elided ↑ open up ↑
  49   50  /*
  50   51   * DDT classes, in the desired search order (highest replication level first).
  51   52   */
  52   53  enum ddt_class {
  53   54          DDT_CLASS_DITTO = 0,
  54   55          DDT_CLASS_DUPLICATE,
  55   56          DDT_CLASS_UNIQUE,
  56   57          DDT_CLASSES
  57   58  };
  58   59  
       60 +/*
       61 + * Tracks whether a DDE is loading or already loaded and
       62 + * which entries got removed from dedup path to support dedup ceiling
       63 + */
       64 +enum dde_state {
       65 +        DDE_LOADING     = (1 << 0),
       66 +        DDE_LOADED      = (1 << 1),
       67 +        DDE_NEW         = (1 << 2),
       68 +        DDE_DONT_SYNC   = (1 << 3),
       69 +};
       70 +
  59   71  #define DDT_TYPE_CURRENT                0
  60   72  
  61   73  #define DDT_COMPRESS_BYTEORDER_MASK     0x80
  62   74  #define DDT_COMPRESS_FUNCTION_MASK      0x7f
  63   75  
  64   76  /*
  65   77   * On-disk ddt entry:  key (name) and physical storage (value).
  66   78   */
  67   79  typedef struct ddt_key {
  68   80          zio_cksum_t     ddk_cksum;      /* 256-bit block checksum */
↓ open down ↓ 35 lines elided ↑ open up ↑
 104  116          DDT_PHYS_TYPES
 105  117  };
 106  118  
 107  119  /*
 108  120   * In-core ddt entry
 109  121   */
 110  122  struct ddt_entry {
 111  123          ddt_key_t       dde_key;
 112  124          ddt_phys_t      dde_phys[DDT_PHYS_TYPES];
 113  125          zio_t           *dde_lead_zio[DDT_PHYS_TYPES];
      126 +        ddt_stat_t      dde_lkstat;
 114  127          struct abd      *dde_repair_abd;
 115  128          enum ddt_type   dde_type;
 116  129          enum ddt_class  dde_class;
 117      -        uint8_t         dde_loading;
 118      -        uint8_t         dde_loaded;
      130 +        uint8_t         dde_state;
 119  131          kcondvar_t      dde_cv;
      132 +        kmutex_t        dde_lock;
 120  133          avl_node_t      dde_node;
 121  134  };
 122  135  
      136 +#define DDT_HASHSZ              0x100
      137 +#define DDT_HASHFN(csum)        (*((uint8_t *)&(csum).zc_word[0]) & \
      138 +            (DDT_HASHSZ - 1))
      139 +
 123  140  /*
 124  141   * In-core ddt
 125  142   */
 126  143  struct ddt {
 127      -        kmutex_t        ddt_lock;
 128      -        avl_tree_t      ddt_tree;
      144 +        kmutex_t        ddt_lock[DDT_HASHSZ];
      145 +        avl_tree_t      ddt_tree[DDT_HASHSZ];
      146 +        kmutex_t        ddt_repair_lock;
 129  147          avl_tree_t      ddt_repair_tree;
 130  148          enum zio_checksum ddt_checksum;
 131  149          spa_t           *ddt_spa;
 132  150          objset_t        *ddt_os;
 133  151          uint64_t        ddt_stat_object;
 134  152          uint64_t        ddt_object[DDT_TYPES][DDT_CLASSES];
 135  153          ddt_histogram_t ddt_histogram[DDT_TYPES][DDT_CLASSES];
 136  154          ddt_histogram_t ddt_histogram_cache[DDT_TYPES][DDT_CLASSES];
 137  155          ddt_object_t    ddt_object_stats[DDT_TYPES][DDT_CLASSES];
 138  156          avl_node_t      ddt_node;
↓ open down ↓ 19 lines elided ↑ open up ↑
 158  176          int (*ddt_op_destroy)(objset_t *os, uint64_t object, dmu_tx_t *tx);
 159  177          int (*ddt_op_lookup)(objset_t *os, uint64_t object, ddt_entry_t *dde);
 160  178          void (*ddt_op_prefetch)(objset_t *os, uint64_t object,
 161  179              ddt_entry_t *dde);
 162  180          int (*ddt_op_update)(objset_t *os, uint64_t object, ddt_entry_t *dde,
 163  181              dmu_tx_t *tx);
 164  182          int (*ddt_op_remove)(objset_t *os, uint64_t object, ddt_entry_t *dde,
 165  183              dmu_tx_t *tx);
 166  184          int (*ddt_op_walk)(objset_t *os, uint64_t object, ddt_entry_t *dde,
 167  185              uint64_t *walk);
 168      -        uint64_t (*ddt_op_count)(objset_t *os, uint64_t object);
      186 +        int (*ddt_op_count)(objset_t *os, uint64_t object, uint64_t *count);
 169  187  } ddt_ops_t;
 170  188  
 171  189  #define DDT_NAMELEN     80
 172  190  
 173  191  extern void ddt_object_name(ddt_t *ddt, enum ddt_type type,
 174  192      enum ddt_class class, char *name);
 175  193  extern int ddt_object_walk(ddt_t *ddt, enum ddt_type type,
 176  194      enum ddt_class class, uint64_t *walk, ddt_entry_t *dde);
 177      -extern uint64_t ddt_object_count(ddt_t *ddt, enum ddt_type type,
 178      -    enum ddt_class class);
      195 +extern int ddt_object_count(ddt_t *ddt, enum ddt_type type,
      196 +    enum ddt_class class, uint64_t *count);
 179  197  extern int ddt_object_info(ddt_t *ddt, enum ddt_type type,
 180  198      enum ddt_class class, dmu_object_info_t *);
 181  199  extern boolean_t ddt_object_exists(ddt_t *ddt, enum ddt_type type,
 182  200      enum ddt_class class);
 183  201  
 184  202  extern void ddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp,
 185  203      uint64_t txg);
 186  204  extern void ddt_bp_create(enum zio_checksum checksum, const ddt_key_t *ddk,
 187  205      const ddt_phys_t *ddp, blkptr_t *bp);
 188  206  
↓ open down ↓ 21 lines elided ↑ open up ↑
 210  228  extern uint64_t ddt_get_pool_dedup_ratio(spa_t *spa);
 211  229  
 212  230  extern int ddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde,
 213  231      ddt_phys_t *ddp_willref);
 214  232  extern int ddt_ditto_copies_present(ddt_entry_t *dde);
 215  233  
 216  234  extern size_t ddt_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len);
 217  235  extern void ddt_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len);
 218  236  
 219  237  extern ddt_t *ddt_select(spa_t *spa, const blkptr_t *bp);
 220      -extern void ddt_enter(ddt_t *ddt);
 221      -extern void ddt_exit(ddt_t *ddt);
      238 +extern void ddt_enter(ddt_t *ddt, uint8_t hash);
      239 +extern void ddt_exit(ddt_t *ddt, uint8_t hash);
      240 +extern void dde_enter(ddt_entry_t *dde);
      241 +extern void dde_exit(ddt_entry_t *dde);
 222  242  extern ddt_entry_t *ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add);
 223  243  extern void ddt_prefetch(spa_t *spa, const blkptr_t *bp);
 224  244  extern void ddt_remove(ddt_t *ddt, ddt_entry_t *dde);
 225  245  
 226  246  extern boolean_t ddt_class_contains(spa_t *spa, enum ddt_class max_class,
 227  247      const blkptr_t *bp);
 228  248  
 229  249  extern ddt_entry_t *ddt_repair_start(ddt_t *ddt, const blkptr_t *bp);
 230  250  extern void ddt_repair_done(ddt_t *ddt, ddt_entry_t *dde);
 231  251  
 232  252  extern int ddt_entry_compare(const void *x1, const void *x2);
 233  253  
 234  254  extern void ddt_create(spa_t *spa);
 235  255  extern int ddt_load(spa_t *spa);
 236  256  extern void ddt_unload(spa_t *spa);
 237  257  extern void ddt_sync(spa_t *spa, uint64_t txg);
 238  258  extern int ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde);
 239  259  extern int ddt_object_update(ddt_t *ddt, enum ddt_type type,
 240  260      enum ddt_class class, ddt_entry_t *dde, dmu_tx_t *tx);
      261 +extern void ddt_init(void);
      262 +extern void ddt_fini(void);
 241  263  
 242  264  extern const ddt_ops_t ddt_zap_ops;
 243  265  
 244  266  #ifdef  __cplusplus
 245  267  }
 246  268  #endif
 247  269  
 248  270  #endif  /* _SYS_DDT_H */
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX