Print this page
    
2619 asynchronous destruction of ZFS file systems
2747 SPA versioning with zfs feature flags
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <gwilson@delphix.com>
Reviewed by: Richard Lowe <richlowe@richlowe.net>
Reviewed by: Dan Kruchinin <dan.kruchinin@gmail.com>
Approved by: Dan McDonald <danmcd@nexenta.com>
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/zfs/ddt.c
          +++ new/usr/src/uts/common/fs/zfs/ddt.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  
    | 
      ↓ open down ↓ | 
    13 lines elided | 
    
      ↑ open up ↑ | 
  
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
       24 + * Copyright (c) 2012 by Delphix. All rights reserved.
  24   25   */
  25   26  
  26   27  #include <sys/zfs_context.h>
  27   28  #include <sys/spa.h>
  28   29  #include <sys/spa_impl.h>
  29   30  #include <sys/zio.h>
  30   31  #include <sys/ddt.h>
  31   32  #include <sys/zap.h>
  32   33  #include <sys/dmu_tx.h>
  33   34  #include <sys/arc.h>
  34   35  #include <sys/dsl_pool.h>
  35   36  #include <sys/zio_checksum.h>
  36   37  #include <sys/zio_compress.h>
  37   38  #include <sys/dsl_scan.h>
  38   39  
  39   40  /*
  40   41   * Enable/disable prefetching of dedup-ed blocks which are going to be freed.
  41   42   */
  42   43  int zfs_dedup_prefetch = 1;
  43   44  
  44   45  static const ddt_ops_t *ddt_ops[DDT_TYPES] = {
  45   46          &ddt_zap_ops,
  46   47  };
  47   48  
  48   49  static const char *ddt_class_name[DDT_CLASSES] = {
  49   50          "ditto",
  50   51          "duplicate",
  51   52          "unique",
  52   53  };
  53   54  
  54   55  static void
  55   56  ddt_object_create(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
  56   57      dmu_tx_t *tx)
  57   58  {
  58   59          spa_t *spa = ddt->ddt_spa;
  59   60          objset_t *os = ddt->ddt_os;
  60   61          uint64_t *objectp = &ddt->ddt_object[type][class];
  61   62          boolean_t prehash = zio_checksum_table[ddt->ddt_checksum].ci_dedup;
  62   63          char name[DDT_NAMELEN];
  63   64  
  64   65          ddt_object_name(ddt, type, class, name);
  65   66  
  66   67          ASSERT(*objectp == 0);
  67   68          VERIFY(ddt_ops[type]->ddt_op_create(os, objectp, tx, prehash) == 0);
  68   69          ASSERT(*objectp != 0);
  69   70  
  70   71          VERIFY(zap_add(os, DMU_POOL_DIRECTORY_OBJECT, name,
  71   72              sizeof (uint64_t), 1, objectp, tx) == 0);
  72   73  
  73   74          VERIFY(zap_add(os, spa->spa_ddt_stat_object, name,
  74   75              sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t),
  75   76              &ddt->ddt_histogram[type][class], tx) == 0);
  76   77  }
  77   78  
  78   79  static void
  79   80  ddt_object_destroy(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
  80   81      dmu_tx_t *tx)
  81   82  {
  82   83          spa_t *spa = ddt->ddt_spa;
  83   84          objset_t *os = ddt->ddt_os;
  84   85          uint64_t *objectp = &ddt->ddt_object[type][class];
  85   86          char name[DDT_NAMELEN];
  86   87  
  87   88          ddt_object_name(ddt, type, class, name);
  88   89  
  89   90          ASSERT(*objectp != 0);
  90   91          ASSERT(ddt_object_count(ddt, type, class) == 0);
  91   92          ASSERT(ddt_histogram_empty(&ddt->ddt_histogram[type][class]));
  92   93          VERIFY(zap_remove(os, DMU_POOL_DIRECTORY_OBJECT, name, tx) == 0);
  93   94          VERIFY(zap_remove(os, spa->spa_ddt_stat_object, name, tx) == 0);
  94   95          VERIFY(ddt_ops[type]->ddt_op_destroy(os, *objectp, tx) == 0);
  95   96          bzero(&ddt->ddt_object_stats[type][class], sizeof (ddt_object_t));
  96   97  
  97   98          *objectp = 0;
  98   99  }
  99  100  
 100  101  static int
 101  102  ddt_object_load(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
 102  103  {
 103  104          ddt_object_t *ddo = &ddt->ddt_object_stats[type][class];
 104  105          dmu_object_info_t doi;
 105  106          char name[DDT_NAMELEN];
 106  107          int error;
 107  108  
 108  109          ddt_object_name(ddt, type, class, name);
 109  110  
 110  111          error = zap_lookup(ddt->ddt_os, DMU_POOL_DIRECTORY_OBJECT, name,
 111  112              sizeof (uint64_t), 1, &ddt->ddt_object[type][class]);
 112  113  
 113  114          if (error)
 114  115                  return (error);
 115  116  
 116  117          error = zap_lookup(ddt->ddt_os, ddt->ddt_spa->spa_ddt_stat_object, name,
 117  118              sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t),
 118  119              &ddt->ddt_histogram[type][class]);
 119  120  
 120  121          /*
 121  122           * Seed the cached statistics.
 122  123           */
 123  124          VERIFY(ddt_object_info(ddt, type, class, &doi) == 0);
 124  125  
 125  126          ddo->ddo_count = ddt_object_count(ddt, type, class);
 126  127          ddo->ddo_dspace = doi.doi_physical_blocks_512 << 9;
 127  128          ddo->ddo_mspace = doi.doi_fill_count * doi.doi_data_block_size;
 128  129  
 129  130          ASSERT(error == 0);
 130  131          return (error);
 131  132  }
 132  133  
 133  134  static void
 134  135  ddt_object_sync(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
 135  136      dmu_tx_t *tx)
 136  137  {
 137  138          ddt_object_t *ddo = &ddt->ddt_object_stats[type][class];
 138  139          dmu_object_info_t doi;
 139  140          char name[DDT_NAMELEN];
 140  141  
 141  142          ddt_object_name(ddt, type, class, name);
 142  143  
 143  144          VERIFY(zap_update(ddt->ddt_os, ddt->ddt_spa->spa_ddt_stat_object, name,
 144  145              sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t),
 145  146              &ddt->ddt_histogram[type][class], tx) == 0);
 146  147  
 147  148          /*
 148  149           * Cache DDT statistics; this is the only time they'll change.
 149  150           */
 150  151          VERIFY(ddt_object_info(ddt, type, class, &doi) == 0);
 151  152  
 152  153          ddo->ddo_count = ddt_object_count(ddt, type, class);
 153  154          ddo->ddo_dspace = doi.doi_physical_blocks_512 << 9;
 154  155          ddo->ddo_mspace = doi.doi_fill_count * doi.doi_data_block_size;
 155  156  }
 156  157  
 157  158  static int
 158  159  ddt_object_lookup(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
 159  160      ddt_entry_t *dde)
 160  161  {
 161  162          if (!ddt_object_exists(ddt, type, class))
 162  163                  return (ENOENT);
 163  164  
 164  165          return (ddt_ops[type]->ddt_op_lookup(ddt->ddt_os,
 165  166              ddt->ddt_object[type][class], dde));
 166  167  }
 167  168  
 168  169  static void
 169  170  ddt_object_prefetch(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
 170  171      ddt_entry_t *dde)
 171  172  {
 172  173          if (!ddt_object_exists(ddt, type, class))
 173  174                  return;
 174  175  
 175  176          ddt_ops[type]->ddt_op_prefetch(ddt->ddt_os,
 176  177              ddt->ddt_object[type][class], dde);
 177  178  }
 178  179  
 179  180  int
 180  181  ddt_object_update(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
 181  182      ddt_entry_t *dde, dmu_tx_t *tx)
 182  183  {
 183  184          ASSERT(ddt_object_exists(ddt, type, class));
 184  185  
 185  186          return (ddt_ops[type]->ddt_op_update(ddt->ddt_os,
 186  187              ddt->ddt_object[type][class], dde, tx));
 187  188  }
 188  189  
 189  190  static int
 190  191  ddt_object_remove(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
 191  192      ddt_entry_t *dde, dmu_tx_t *tx)
 192  193  {
 193  194          ASSERT(ddt_object_exists(ddt, type, class));
 194  195  
 195  196          return (ddt_ops[type]->ddt_op_remove(ddt->ddt_os,
 196  197              ddt->ddt_object[type][class], dde, tx));
 197  198  }
 198  199  
 199  200  int
 200  201  ddt_object_walk(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
 201  202      uint64_t *walk, ddt_entry_t *dde)
 202  203  {
 203  204          ASSERT(ddt_object_exists(ddt, type, class));
 204  205  
 205  206          return (ddt_ops[type]->ddt_op_walk(ddt->ddt_os,
 206  207              ddt->ddt_object[type][class], dde, walk));
 207  208  }
 208  209  
 209  210  uint64_t
 210  211  ddt_object_count(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
 211  212  {
 212  213          ASSERT(ddt_object_exists(ddt, type, class));
 213  214  
 214  215          return (ddt_ops[type]->ddt_op_count(ddt->ddt_os,
 215  216              ddt->ddt_object[type][class]));
 216  217  }
 217  218  
 218  219  int
 219  220  ddt_object_info(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
 220  221      dmu_object_info_t *doi)
 221  222  {
 222  223          if (!ddt_object_exists(ddt, type, class))
 223  224                  return (ENOENT);
 224  225  
 225  226          return (dmu_object_info(ddt->ddt_os, ddt->ddt_object[type][class],
 226  227              doi));
 227  228  }
 228  229  
 229  230  boolean_t
 230  231  ddt_object_exists(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
 231  232  {
 232  233          return (!!ddt->ddt_object[type][class]);
 233  234  }
 234  235  
 235  236  void
 236  237  ddt_object_name(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
 237  238      char *name)
 238  239  {
 239  240          (void) sprintf(name, DMU_POOL_DDT,
 240  241              zio_checksum_table[ddt->ddt_checksum].ci_name,
 241  242              ddt_ops[type]->ddt_op_name, ddt_class_name[class]);
 242  243  }
 243  244  
 244  245  void
 245  246  ddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp, uint64_t txg)
 246  247  {
 247  248          ASSERT(txg != 0);
 248  249  
 249  250          for (int d = 0; d < SPA_DVAS_PER_BP; d++)
 250  251                  bp->blk_dva[d] = ddp->ddp_dva[d];
 251  252          BP_SET_BIRTH(bp, txg, ddp->ddp_phys_birth);
 252  253  }
 253  254  
 254  255  void
 255  256  ddt_bp_create(enum zio_checksum checksum,
 256  257      const ddt_key_t *ddk, const ddt_phys_t *ddp, blkptr_t *bp)
 257  258  {
 258  259          BP_ZERO(bp);
 259  260  
 260  261          if (ddp != NULL)
 261  262                  ddt_bp_fill(ddp, bp, ddp->ddp_phys_birth);
 262  263  
 263  264          bp->blk_cksum = ddk->ddk_cksum;
 264  265          bp->blk_fill = 1;
 265  266  
 266  267          BP_SET_LSIZE(bp, DDK_GET_LSIZE(ddk));
 267  268          BP_SET_PSIZE(bp, DDK_GET_PSIZE(ddk));
 268  269          BP_SET_COMPRESS(bp, DDK_GET_COMPRESS(ddk));
 269  270          BP_SET_CHECKSUM(bp, checksum);
 270  271          BP_SET_TYPE(bp, DMU_OT_DEDUP);
 271  272          BP_SET_LEVEL(bp, 0);
 272  273          BP_SET_DEDUP(bp, 0);
 273  274          BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
 274  275  }
 275  276  
 276  277  void
 277  278  ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp)
 278  279  {
 279  280          ddk->ddk_cksum = bp->blk_cksum;
 280  281          ddk->ddk_prop = 0;
 281  282  
 282  283          DDK_SET_LSIZE(ddk, BP_GET_LSIZE(bp));
 283  284          DDK_SET_PSIZE(ddk, BP_GET_PSIZE(bp));
 284  285          DDK_SET_COMPRESS(ddk, BP_GET_COMPRESS(bp));
 285  286  }
 286  287  
 287  288  void
 288  289  ddt_phys_fill(ddt_phys_t *ddp, const blkptr_t *bp)
 289  290  {
 290  291          ASSERT(ddp->ddp_phys_birth == 0);
 291  292  
 292  293          for (int d = 0; d < SPA_DVAS_PER_BP; d++)
 293  294                  ddp->ddp_dva[d] = bp->blk_dva[d];
 294  295          ddp->ddp_phys_birth = BP_PHYSICAL_BIRTH(bp);
 295  296  }
 296  297  
 297  298  void
 298  299  ddt_phys_clear(ddt_phys_t *ddp)
 299  300  {
 300  301          bzero(ddp, sizeof (*ddp));
 301  302  }
 302  303  
 303  304  void
 304  305  ddt_phys_addref(ddt_phys_t *ddp)
 305  306  {
 306  307          ddp->ddp_refcnt++;
 307  308  }
 308  309  
 309  310  void
 310  311  ddt_phys_decref(ddt_phys_t *ddp)
 311  312  {
 312  313          ASSERT((int64_t)ddp->ddp_refcnt > 0);
 313  314          ddp->ddp_refcnt--;
 314  315  }
 315  316  
 316  317  void
 317  318  ddt_phys_free(ddt_t *ddt, ddt_key_t *ddk, ddt_phys_t *ddp, uint64_t txg)
 318  319  {
 319  320          blkptr_t blk;
 320  321  
 321  322          ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
 322  323          ddt_phys_clear(ddp);
 323  324          zio_free(ddt->ddt_spa, txg, &blk);
 324  325  }
 325  326  
 326  327  ddt_phys_t *
 327  328  ddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp)
 328  329  {
 329  330          ddt_phys_t *ddp = (ddt_phys_t *)dde->dde_phys;
 330  331  
 331  332          for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
 332  333                  if (DVA_EQUAL(BP_IDENTITY(bp), &ddp->ddp_dva[0]) &&
 333  334                      BP_PHYSICAL_BIRTH(bp) == ddp->ddp_phys_birth)
 334  335                          return (ddp);
 335  336          }
 336  337          return (NULL);
 337  338  }
 338  339  
 339  340  uint64_t
 340  341  ddt_phys_total_refcnt(const ddt_entry_t *dde)
 341  342  {
 342  343          uint64_t refcnt = 0;
 343  344  
 344  345          for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++)
 345  346                  refcnt += dde->dde_phys[p].ddp_refcnt;
 346  347  
 347  348          return (refcnt);
 348  349  }
 349  350  
 350  351  static void
 351  352  ddt_stat_generate(ddt_t *ddt, ddt_entry_t *dde, ddt_stat_t *dds)
 352  353  {
 353  354          spa_t *spa = ddt->ddt_spa;
 354  355          ddt_phys_t *ddp = dde->dde_phys;
 355  356          ddt_key_t *ddk = &dde->dde_key;
 356  357          uint64_t lsize = DDK_GET_LSIZE(ddk);
 357  358          uint64_t psize = DDK_GET_PSIZE(ddk);
 358  359  
 359  360          bzero(dds, sizeof (*dds));
 360  361  
 361  362          for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
 362  363                  uint64_t dsize = 0;
 363  364                  uint64_t refcnt = ddp->ddp_refcnt;
 364  365  
 365  366                  if (ddp->ddp_phys_birth == 0)
 366  367                          continue;
 367  368  
 368  369                  for (int d = 0; d < SPA_DVAS_PER_BP; d++)
 369  370                          dsize += dva_get_dsize_sync(spa, &ddp->ddp_dva[d]);
 370  371  
 371  372                  dds->dds_blocks += 1;
 372  373                  dds->dds_lsize += lsize;
 373  374                  dds->dds_psize += psize;
 374  375                  dds->dds_dsize += dsize;
 375  376  
 376  377                  dds->dds_ref_blocks += refcnt;
 377  378                  dds->dds_ref_lsize += lsize * refcnt;
 378  379                  dds->dds_ref_psize += psize * refcnt;
 379  380                  dds->dds_ref_dsize += dsize * refcnt;
 380  381          }
 381  382  }
 382  383  
 383  384  void
 384  385  ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg)
 385  386  {
 386  387          const uint64_t *s = (const uint64_t *)src;
 387  388          uint64_t *d = (uint64_t *)dst;
 388  389          uint64_t *d_end = (uint64_t *)(dst + 1);
 389  390  
 390  391          ASSERT(neg == 0 || neg == -1ULL);       /* add or subtract */
 391  392  
 392  393          while (d < d_end)
 393  394                  *d++ += (*s++ ^ neg) - neg;
 394  395  }
 395  396  
 396  397  static void
 397  398  ddt_stat_update(ddt_t *ddt, ddt_entry_t *dde, uint64_t neg)
 398  399  {
 399  400          ddt_stat_t dds;
 400  401          ddt_histogram_t *ddh;
 401  402          int bucket;
 402  403  
 403  404          ddt_stat_generate(ddt, dde, &dds);
 404  405  
 405  406          bucket = highbit(dds.dds_ref_blocks) - 1;
 406  407          ASSERT(bucket >= 0);
 407  408  
 408  409          ddh = &ddt->ddt_histogram[dde->dde_type][dde->dde_class];
 409  410  
 410  411          ddt_stat_add(&ddh->ddh_stat[bucket], &dds, neg);
 411  412  }
 412  413  
 413  414  void
 414  415  ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src)
 415  416  {
 416  417          for (int h = 0; h < 64; h++)
 417  418                  ddt_stat_add(&dst->ddh_stat[h], &src->ddh_stat[h], 0);
 418  419  }
 419  420  
 420  421  void
 421  422  ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh)
 422  423  {
 423  424          bzero(dds, sizeof (*dds));
 424  425  
 425  426          for (int h = 0; h < 64; h++)
 426  427                  ddt_stat_add(dds, &ddh->ddh_stat[h], 0);
 427  428  }
 428  429  
 429  430  boolean_t
 430  431  ddt_histogram_empty(const ddt_histogram_t *ddh)
 431  432  {
 432  433          const uint64_t *s = (const uint64_t *)ddh;
 433  434          const uint64_t *s_end = (const uint64_t *)(ddh + 1);
 434  435  
 435  436          while (s < s_end)
 436  437                  if (*s++ != 0)
 437  438                          return (B_FALSE);
 438  439  
 439  440          return (B_TRUE);
 440  441  }
 441  442  
 442  443  void
 443  444  ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo_total)
 444  445  {
 445  446          /* Sum the statistics we cached in ddt_object_sync(). */
 446  447          for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
 447  448                  ddt_t *ddt = spa->spa_ddt[c];
 448  449                  for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
 449  450                          for (enum ddt_class class = 0; class < DDT_CLASSES;
 450  451                              class++) {
 451  452                                  ddt_object_t *ddo =
 452  453                                      &ddt->ddt_object_stats[type][class];
 453  454                                  ddo_total->ddo_count += ddo->ddo_count;
 454  455                                  ddo_total->ddo_dspace += ddo->ddo_dspace;
 455  456                                  ddo_total->ddo_mspace += ddo->ddo_mspace;
 456  457                          }
 457  458                  }
 458  459          }
 459  460  
 460  461          /* ... and compute the averages. */
 461  462          if (ddo_total->ddo_count != 0) {
 462  463                  ddo_total->ddo_dspace /= ddo_total->ddo_count;
 463  464                  ddo_total->ddo_mspace /= ddo_total->ddo_count;
 464  465          }
 465  466  }
 466  467  
 467  468  void
 468  469  ddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh)
 469  470  {
 470  471          for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
 471  472                  ddt_t *ddt = spa->spa_ddt[c];
 472  473                  for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
 473  474                          for (enum ddt_class class = 0; class < DDT_CLASSES;
 474  475                              class++) {
 475  476                                  ddt_histogram_add(ddh,
 476  477                                      &ddt->ddt_histogram_cache[type][class]);
 477  478                          }
 478  479                  }
 479  480          }
 480  481  }
 481  482  
 482  483  void
 483  484  ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total)
 484  485  {
 485  486          ddt_histogram_t *ddh_total;
 486  487  
 487  488          ddh_total = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP);
 488  489          ddt_get_dedup_histogram(spa, ddh_total);
 489  490          ddt_histogram_stat(dds_total, ddh_total);
 490  491          kmem_free(ddh_total, sizeof (ddt_histogram_t));
 491  492  }
 492  493  
 493  494  uint64_t
 494  495  ddt_get_dedup_dspace(spa_t *spa)
 495  496  {
 496  497          ddt_stat_t dds_total = { 0 };
 497  498  
 498  499          ddt_get_dedup_stats(spa, &dds_total);
 499  500          return (dds_total.dds_ref_dsize - dds_total.dds_dsize);
 500  501  }
 501  502  
 502  503  uint64_t
 503  504  ddt_get_pool_dedup_ratio(spa_t *spa)
 504  505  {
 505  506          ddt_stat_t dds_total = { 0 };
 506  507  
 507  508          ddt_get_dedup_stats(spa, &dds_total);
 508  509          if (dds_total.dds_dsize == 0)
 509  510                  return (100);
 510  511  
 511  512          return (dds_total.dds_ref_dsize * 100 / dds_total.dds_dsize);
 512  513  }
 513  514  
 514  515  int
 515  516  ddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde, ddt_phys_t *ddp_willref)
 516  517  {
 517  518          spa_t *spa = ddt->ddt_spa;
 518  519          uint64_t total_refcnt = 0;
 519  520          uint64_t ditto = spa->spa_dedup_ditto;
 520  521          int total_copies = 0;
 521  522          int desired_copies = 0;
 522  523  
 523  524          for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) {
 524  525                  ddt_phys_t *ddp = &dde->dde_phys[p];
 525  526                  zio_t *zio = dde->dde_lead_zio[p];
 526  527                  uint64_t refcnt = ddp->ddp_refcnt;      /* committed refs */
 527  528                  if (zio != NULL)
 528  529                          refcnt += zio->io_parent_count; /* pending refs */
 529  530                  if (ddp == ddp_willref)
 530  531                          refcnt++;                       /* caller's ref */
 531  532                  if (refcnt != 0) {
 532  533                          total_refcnt += refcnt;
 533  534                          total_copies += p;
 534  535                  }
 535  536          }
 536  537  
 537  538          if (ditto == 0 || ditto > UINT32_MAX)
 538  539                  ditto = UINT32_MAX;
 539  540  
 540  541          if (total_refcnt >= 1)
 541  542                  desired_copies++;
 542  543          if (total_refcnt >= ditto)
 543  544                  desired_copies++;
 544  545          if (total_refcnt >= ditto * ditto)
 545  546                  desired_copies++;
 546  547  
 547  548          return (MAX(desired_copies, total_copies) - total_copies);
 548  549  }
 549  550  
 550  551  int
 551  552  ddt_ditto_copies_present(ddt_entry_t *dde)
 552  553  {
 553  554          ddt_phys_t *ddp = &dde->dde_phys[DDT_PHYS_DITTO];
 554  555          dva_t *dva = ddp->ddp_dva;
 555  556          int copies = 0 - DVA_GET_GANG(dva);
 556  557  
 557  558          for (int d = 0; d < SPA_DVAS_PER_BP; d++, dva++)
 558  559                  if (DVA_IS_VALID(dva))
 559  560                          copies++;
 560  561  
 561  562          ASSERT(copies >= 0 && copies < SPA_DVAS_PER_BP);
 562  563  
 563  564          return (copies);
 564  565  }
 565  566  
 566  567  size_t
 567  568  ddt_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len)
 568  569  {
 569  570          uchar_t *version = dst++;
 570  571          int cpfunc = ZIO_COMPRESS_ZLE;
 571  572          zio_compress_info_t *ci = &zio_compress_table[cpfunc];
 572  573          size_t c_len;
 573  574  
 574  575          ASSERT(d_len >= s_len + 1);     /* no compression plus version byte */
 575  576  
 576  577          c_len = ci->ci_compress(src, dst, s_len, d_len - 1, ci->ci_level);
 577  578  
 578  579          if (c_len == s_len) {
 579  580                  cpfunc = ZIO_COMPRESS_OFF;
 580  581                  bcopy(src, dst, s_len);
 581  582          }
 582  583  
 583  584          *version = (ZFS_HOST_BYTEORDER & DDT_COMPRESS_BYTEORDER_MASK) | cpfunc;
 584  585  
 585  586          return (c_len + 1);
 586  587  }
 587  588  
 588  589  void
 589  590  ddt_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len)
 590  591  {
 591  592          uchar_t version = *src++;
 592  593          int cpfunc = version & DDT_COMPRESS_FUNCTION_MASK;
 593  594          zio_compress_info_t *ci = &zio_compress_table[cpfunc];
 594  595  
 595  596          if (ci->ci_decompress != NULL)
 596  597                  (void) ci->ci_decompress(src, dst, s_len, d_len, ci->ci_level);
 597  598          else
 598  599                  bcopy(src, dst, d_len);
 599  600  
 600  601          if ((version ^ ZFS_HOST_BYTEORDER) & DDT_COMPRESS_BYTEORDER_MASK)
 601  602                  byteswap_uint64_array(dst, d_len);
 602  603  }
 603  604  
 604  605  ddt_t *
 605  606  ddt_select_by_checksum(spa_t *spa, enum zio_checksum c)
 606  607  {
 607  608          return (spa->spa_ddt[c]);
 608  609  }
 609  610  
 610  611  ddt_t *
 611  612  ddt_select(spa_t *spa, const blkptr_t *bp)
 612  613  {
 613  614          return (spa->spa_ddt[BP_GET_CHECKSUM(bp)]);
 614  615  }
 615  616  
 616  617  void
 617  618  ddt_enter(ddt_t *ddt)
 618  619  {
 619  620          mutex_enter(&ddt->ddt_lock);
 620  621  }
 621  622  
 622  623  void
 623  624  ddt_exit(ddt_t *ddt)
 624  625  {
 625  626          mutex_exit(&ddt->ddt_lock);
 626  627  }
 627  628  
 628  629  static ddt_entry_t *
 629  630  ddt_alloc(const ddt_key_t *ddk)
 630  631  {
 631  632          ddt_entry_t *dde;
 632  633  
 633  634          dde = kmem_zalloc(sizeof (ddt_entry_t), KM_SLEEP);
 634  635          cv_init(&dde->dde_cv, NULL, CV_DEFAULT, NULL);
 635  636  
 636  637          dde->dde_key = *ddk;
 637  638  
 638  639          return (dde);
 639  640  }
 640  641  
 641  642  static void
 642  643  ddt_free(ddt_entry_t *dde)
 643  644  {
 644  645          ASSERT(!dde->dde_loading);
 645  646  
 646  647          for (int p = 0; p < DDT_PHYS_TYPES; p++)
 647  648                  ASSERT(dde->dde_lead_zio[p] == NULL);
 648  649  
 649  650          if (dde->dde_repair_data != NULL)
 650  651                  zio_buf_free(dde->dde_repair_data,
 651  652                      DDK_GET_PSIZE(&dde->dde_key));
 652  653  
 653  654          cv_destroy(&dde->dde_cv);
 654  655          kmem_free(dde, sizeof (*dde));
 655  656  }
 656  657  
 657  658  void
 658  659  ddt_remove(ddt_t *ddt, ddt_entry_t *dde)
 659  660  {
 660  661          ASSERT(MUTEX_HELD(&ddt->ddt_lock));
 661  662  
 662  663          avl_remove(&ddt->ddt_tree, dde);
 663  664          ddt_free(dde);
 664  665  }
 665  666  
 666  667  ddt_entry_t *
 667  668  ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add)
 668  669  {
 669  670          ddt_entry_t *dde, dde_search;
 670  671          enum ddt_type type;
 671  672          enum ddt_class class;
 672  673          avl_index_t where;
 673  674          int error;
 674  675  
 675  676          ASSERT(MUTEX_HELD(&ddt->ddt_lock));
 676  677  
 677  678          ddt_key_fill(&dde_search.dde_key, bp);
 678  679  
 679  680          dde = avl_find(&ddt->ddt_tree, &dde_search, &where);
 680  681          if (dde == NULL) {
 681  682                  if (!add)
 682  683                          return (NULL);
 683  684                  dde = ddt_alloc(&dde_search.dde_key);
 684  685                  avl_insert(&ddt->ddt_tree, dde, where);
 685  686          }
 686  687  
 687  688          while (dde->dde_loading)
 688  689                  cv_wait(&dde->dde_cv, &ddt->ddt_lock);
 689  690  
 690  691          if (dde->dde_loaded)
 691  692                  return (dde);
 692  693  
 693  694          dde->dde_loading = B_TRUE;
 694  695  
 695  696          ddt_exit(ddt);
 696  697  
 697  698          error = ENOENT;
 698  699  
 699  700          for (type = 0; type < DDT_TYPES; type++) {
 700  701                  for (class = 0; class < DDT_CLASSES; class++) {
 701  702                          error = ddt_object_lookup(ddt, type, class, dde);
 702  703                          if (error != ENOENT)
 703  704                                  break;
 704  705                  }
 705  706                  if (error != ENOENT)
 706  707                          break;
 707  708          }
 708  709  
 709  710          ASSERT(error == 0 || error == ENOENT);
 710  711  
 711  712          ddt_enter(ddt);
 712  713  
 713  714          ASSERT(dde->dde_loaded == B_FALSE);
 714  715          ASSERT(dde->dde_loading == B_TRUE);
 715  716  
 716  717          dde->dde_type = type;   /* will be DDT_TYPES if no entry found */
 717  718          dde->dde_class = class; /* will be DDT_CLASSES if no entry found */
 718  719          dde->dde_loaded = B_TRUE;
 719  720          dde->dde_loading = B_FALSE;
 720  721  
 721  722          if (error == 0)
 722  723                  ddt_stat_update(ddt, dde, -1ULL);
 723  724  
 724  725          cv_broadcast(&dde->dde_cv);
 725  726  
 726  727          return (dde);
 727  728  }
 728  729  
 729  730  void
 730  731  ddt_prefetch(spa_t *spa, const blkptr_t *bp)
 731  732  {
 732  733          ddt_t *ddt;
 733  734          ddt_entry_t dde;
 734  735  
 735  736          if (!zfs_dedup_prefetch || bp == NULL || !BP_GET_DEDUP(bp))
 736  737                  return;
 737  738  
 738  739          /*
 739  740           * We only remove the DDT once all tables are empty and only
 740  741           * prefetch dedup blocks when there are entries in the DDT.
 741  742           * Thus no locking is required as the DDT can't disappear on us.
 742  743           */
 743  744          ddt = ddt_select(spa, bp);
 744  745          ddt_key_fill(&dde.dde_key, bp);
 745  746  
 746  747          for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
 747  748                  for (enum ddt_class class = 0; class < DDT_CLASSES; class++) {
 748  749                          ddt_object_prefetch(ddt, type, class, &dde);
 749  750                  }
 750  751          }
 751  752  }
 752  753  
 753  754  int
 754  755  ddt_entry_compare(const void *x1, const void *x2)
 755  756  {
 756  757          const ddt_entry_t *dde1 = x1;
 757  758          const ddt_entry_t *dde2 = x2;
 758  759          const uint64_t *u1 = (const uint64_t *)&dde1->dde_key;
 759  760          const uint64_t *u2 = (const uint64_t *)&dde2->dde_key;
 760  761  
 761  762          for (int i = 0; i < DDT_KEY_WORDS; i++) {
 762  763                  if (u1[i] < u2[i])
 763  764                          return (-1);
 764  765                  if (u1[i] > u2[i])
 765  766                          return (1);
 766  767          }
 767  768  
 768  769          return (0);
 769  770  }
 770  771  
 771  772  static ddt_t *
 772  773  ddt_table_alloc(spa_t *spa, enum zio_checksum c)
 773  774  {
 774  775          ddt_t *ddt;
 775  776  
 776  777          ddt = kmem_zalloc(sizeof (*ddt), KM_SLEEP);
 777  778  
 778  779          mutex_init(&ddt->ddt_lock, NULL, MUTEX_DEFAULT, NULL);
 779  780          avl_create(&ddt->ddt_tree, ddt_entry_compare,
 780  781              sizeof (ddt_entry_t), offsetof(ddt_entry_t, dde_node));
 781  782          avl_create(&ddt->ddt_repair_tree, ddt_entry_compare,
 782  783              sizeof (ddt_entry_t), offsetof(ddt_entry_t, dde_node));
 783  784          ddt->ddt_checksum = c;
 784  785          ddt->ddt_spa = spa;
 785  786          ddt->ddt_os = spa->spa_meta_objset;
 786  787  
 787  788          return (ddt);
 788  789  }
 789  790  
 790  791  static void
 791  792  ddt_table_free(ddt_t *ddt)
 792  793  {
 793  794          ASSERT(avl_numnodes(&ddt->ddt_tree) == 0);
 794  795          ASSERT(avl_numnodes(&ddt->ddt_repair_tree) == 0);
 795  796          avl_destroy(&ddt->ddt_tree);
 796  797          avl_destroy(&ddt->ddt_repair_tree);
 797  798          mutex_destroy(&ddt->ddt_lock);
 798  799          kmem_free(ddt, sizeof (*ddt));
 799  800  }
 800  801  
 801  802  void
 802  803  ddt_create(spa_t *spa)
 803  804  {
 804  805          spa->spa_dedup_checksum = ZIO_DEDUPCHECKSUM;
 805  806  
 806  807          for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++)
 807  808                  spa->spa_ddt[c] = ddt_table_alloc(spa, c);
 808  809  }
 809  810  
 810  811  int
 811  812  ddt_load(spa_t *spa)
 812  813  {
 813  814          int error;
 814  815  
 815  816          ddt_create(spa);
 816  817  
 817  818          error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 818  819              DMU_POOL_DDT_STATS, sizeof (uint64_t), 1,
 819  820              &spa->spa_ddt_stat_object);
 820  821  
 821  822          if (error)
 822  823                  return (error == ENOENT ? 0 : error);
 823  824  
 824  825          for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
 825  826                  ddt_t *ddt = spa->spa_ddt[c];
 826  827                  for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
 827  828                          for (enum ddt_class class = 0; class < DDT_CLASSES;
 828  829                              class++) {
 829  830                                  error = ddt_object_load(ddt, type, class);
 830  831                                  if (error != 0 && error != ENOENT)
 831  832                                          return (error);
 832  833                          }
 833  834                  }
 834  835  
 835  836                  /*
 836  837                   * Seed the cached histograms.
 837  838                   */
 838  839                  bcopy(ddt->ddt_histogram, &ddt->ddt_histogram_cache,
 839  840                      sizeof (ddt->ddt_histogram));
 840  841          }
 841  842  
 842  843          return (0);
 843  844  }
 844  845  
 845  846  void
 846  847  ddt_unload(spa_t *spa)
 847  848  {
 848  849          for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
 849  850                  if (spa->spa_ddt[c]) {
 850  851                          ddt_table_free(spa->spa_ddt[c]);
 851  852                          spa->spa_ddt[c] = NULL;
 852  853                  }
 853  854          }
 854  855  }
 855  856  
 856  857  boolean_t
 857  858  ddt_class_contains(spa_t *spa, enum ddt_class max_class, const blkptr_t *bp)
 858  859  {
 859  860          ddt_t *ddt;
 860  861          ddt_entry_t dde;
 861  862  
 862  863          if (!BP_GET_DEDUP(bp))
 863  864                  return (B_FALSE);
 864  865  
 865  866          if (max_class == DDT_CLASS_UNIQUE)
 866  867                  return (B_TRUE);
 867  868  
 868  869          ddt = spa->spa_ddt[BP_GET_CHECKSUM(bp)];
 869  870  
 870  871          ddt_key_fill(&dde.dde_key, bp);
 871  872  
 872  873          for (enum ddt_type type = 0; type < DDT_TYPES; type++)
 873  874                  for (enum ddt_class class = 0; class <= max_class; class++)
 874  875                          if (ddt_object_lookup(ddt, type, class, &dde) == 0)
 875  876                                  return (B_TRUE);
 876  877  
 877  878          return (B_FALSE);
 878  879  }
 879  880  
 880  881  ddt_entry_t *
 881  882  ddt_repair_start(ddt_t *ddt, const blkptr_t *bp)
 882  883  {
 883  884          ddt_key_t ddk;
 884  885          ddt_entry_t *dde;
 885  886  
 886  887          ddt_key_fill(&ddk, bp);
 887  888  
 888  889          dde = ddt_alloc(&ddk);
 889  890  
 890  891          for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
 891  892                  for (enum ddt_class class = 0; class < DDT_CLASSES; class++) {
 892  893                          /*
 893  894                           * We can only do repair if there are multiple copies
 894  895                           * of the block.  For anything in the UNIQUE class,
 895  896                           * there's definitely only one copy, so don't even try.
 896  897                           */
 897  898                          if (class != DDT_CLASS_UNIQUE &&
 898  899                              ddt_object_lookup(ddt, type, class, dde) == 0)
 899  900                                  return (dde);
 900  901                  }
 901  902          }
 902  903  
 903  904          bzero(dde->dde_phys, sizeof (dde->dde_phys));
 904  905  
 905  906          return (dde);
 906  907  }
 907  908  
 908  909  void
 909  910  ddt_repair_done(ddt_t *ddt, ddt_entry_t *dde)
 910  911  {
 911  912          avl_index_t where;
 912  913  
 913  914          ddt_enter(ddt);
 914  915  
 915  916          if (dde->dde_repair_data != NULL && spa_writeable(ddt->ddt_spa) &&
 916  917              avl_find(&ddt->ddt_repair_tree, dde, &where) == NULL)
 917  918                  avl_insert(&ddt->ddt_repair_tree, dde, where);
 918  919          else
 919  920                  ddt_free(dde);
 920  921  
 921  922          ddt_exit(ddt);
 922  923  }
 923  924  
 924  925  static void
 925  926  ddt_repair_entry_done(zio_t *zio)
 926  927  {
 927  928          ddt_entry_t *rdde = zio->io_private;
 928  929  
 929  930          ddt_free(rdde);
 930  931  }
 931  932  
 932  933  static void
 933  934  ddt_repair_entry(ddt_t *ddt, ddt_entry_t *dde, ddt_entry_t *rdde, zio_t *rio)
 934  935  {
 935  936          ddt_phys_t *ddp = dde->dde_phys;
 936  937          ddt_phys_t *rddp = rdde->dde_phys;
 937  938          ddt_key_t *ddk = &dde->dde_key;
 938  939          ddt_key_t *rddk = &rdde->dde_key;
 939  940          zio_t *zio;
 940  941          blkptr_t blk;
 941  942  
 942  943          zio = zio_null(rio, rio->io_spa, NULL,
 943  944              ddt_repair_entry_done, rdde, rio->io_flags);
 944  945  
 945  946          for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++, rddp++) {
 946  947                  if (ddp->ddp_phys_birth == 0 ||
 947  948                      ddp->ddp_phys_birth != rddp->ddp_phys_birth ||
 948  949                      bcmp(ddp->ddp_dva, rddp->ddp_dva, sizeof (ddp->ddp_dva)))
 949  950                          continue;
 950  951                  ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
 951  952                  zio_nowait(zio_rewrite(zio, zio->io_spa, 0, &blk,
 952  953                      rdde->dde_repair_data, DDK_GET_PSIZE(rddk), NULL, NULL,
 953  954                      ZIO_PRIORITY_SYNC_WRITE, ZIO_DDT_CHILD_FLAGS(zio), NULL));
 954  955          }
 955  956  
 956  957          zio_nowait(zio);
 957  958  }
 958  959  
 959  960  static void
 960  961  ddt_repair_table(ddt_t *ddt, zio_t *rio)
 961  962  {
 962  963          spa_t *spa = ddt->ddt_spa;
 963  964          ddt_entry_t *dde, *rdde_next, *rdde;
 964  965          avl_tree_t *t = &ddt->ddt_repair_tree;
 965  966          blkptr_t blk;
 966  967  
 967  968          if (spa_sync_pass(spa) > 1)
 968  969                  return;
 969  970  
 970  971          ddt_enter(ddt);
 971  972          for (rdde = avl_first(t); rdde != NULL; rdde = rdde_next) {
 972  973                  rdde_next = AVL_NEXT(t, rdde);
 973  974                  avl_remove(&ddt->ddt_repair_tree, rdde);
 974  975                  ddt_exit(ddt);
 975  976                  ddt_bp_create(ddt->ddt_checksum, &rdde->dde_key, NULL, &blk);
 976  977                  dde = ddt_repair_start(ddt, &blk);
 977  978                  ddt_repair_entry(ddt, dde, rdde, rio);
 978  979                  ddt_repair_done(ddt, dde);
 979  980                  ddt_enter(ddt);
 980  981          }
 981  982          ddt_exit(ddt);
 982  983  }
 983  984  
 984  985  static void
 985  986  ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg)
 986  987  {
 987  988          dsl_pool_t *dp = ddt->ddt_spa->spa_dsl_pool;
 988  989          ddt_phys_t *ddp = dde->dde_phys;
 989  990          ddt_key_t *ddk = &dde->dde_key;
 990  991          enum ddt_type otype = dde->dde_type;
 991  992          enum ddt_type ntype = DDT_TYPE_CURRENT;
 992  993          enum ddt_class oclass = dde->dde_class;
 993  994          enum ddt_class nclass;
 994  995          uint64_t total_refcnt = 0;
 995  996  
 996  997          ASSERT(dde->dde_loaded);
 997  998          ASSERT(!dde->dde_loading);
 998  999  
 999 1000          for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
1000 1001                  ASSERT(dde->dde_lead_zio[p] == NULL);
1001 1002                  ASSERT((int64_t)ddp->ddp_refcnt >= 0);
1002 1003                  if (ddp->ddp_phys_birth == 0) {
1003 1004                          ASSERT(ddp->ddp_refcnt == 0);
1004 1005                          continue;
1005 1006                  }
1006 1007                  if (p == DDT_PHYS_DITTO) {
1007 1008                          if (ddt_ditto_copies_needed(ddt, dde, NULL) == 0)
1008 1009                                  ddt_phys_free(ddt, ddk, ddp, txg);
1009 1010                          continue;
1010 1011                  }
1011 1012                  if (ddp->ddp_refcnt == 0)
1012 1013                          ddt_phys_free(ddt, ddk, ddp, txg);
1013 1014                  total_refcnt += ddp->ddp_refcnt;
1014 1015          }
1015 1016  
1016 1017          if (dde->dde_phys[DDT_PHYS_DITTO].ddp_phys_birth != 0)
1017 1018                  nclass = DDT_CLASS_DITTO;
1018 1019          else if (total_refcnt > 1)
1019 1020                  nclass = DDT_CLASS_DUPLICATE;
1020 1021          else
1021 1022                  nclass = DDT_CLASS_UNIQUE;
1022 1023  
1023 1024          if (otype != DDT_TYPES &&
1024 1025              (otype != ntype || oclass != nclass || total_refcnt == 0)) {
1025 1026                  VERIFY(ddt_object_remove(ddt, otype, oclass, dde, tx) == 0);
1026 1027                  ASSERT(ddt_object_lookup(ddt, otype, oclass, dde) == ENOENT);
1027 1028          }
1028 1029  
1029 1030          if (total_refcnt != 0) {
1030 1031                  dde->dde_type = ntype;
1031 1032                  dde->dde_class = nclass;
1032 1033                  ddt_stat_update(ddt, dde, 0);
1033 1034                  if (!ddt_object_exists(ddt, ntype, nclass))
1034 1035                          ddt_object_create(ddt, ntype, nclass, tx);
1035 1036                  VERIFY(ddt_object_update(ddt, ntype, nclass, dde, tx) == 0);
1036 1037  
1037 1038                  /*
1038 1039                   * If the class changes, the order that we scan this bp
1039 1040                   * changes.  If it decreases, we could miss it, so
1040 1041                   * scan it right now.  (This covers both class changing
1041 1042                   * while we are doing ddt_walk(), and when we are
1042 1043                   * traversing.)
1043 1044                   */
1044 1045                  if (nclass < oclass) {
1045 1046                          dsl_scan_ddt_entry(dp->dp_scan,
1046 1047                              ddt->ddt_checksum, dde, tx);
1047 1048                  }
1048 1049          }
1049 1050  }
1050 1051  
1051 1052  static void
1052 1053  ddt_sync_table(ddt_t *ddt, dmu_tx_t *tx, uint64_t txg)
1053 1054  {
  
    | 
      ↓ open down ↓ | 
    1020 lines elided | 
    
      ↑ open up ↑ | 
  
1054 1055          spa_t *spa = ddt->ddt_spa;
1055 1056          ddt_entry_t *dde;
1056 1057          void *cookie = NULL;
1057 1058  
1058 1059          if (avl_numnodes(&ddt->ddt_tree) == 0)
1059 1060                  return;
1060 1061  
1061 1062          ASSERT(spa->spa_uberblock.ub_version >= SPA_VERSION_DEDUP);
1062 1063  
1063 1064          if (spa->spa_ddt_stat_object == 0) {
1064      -                spa->spa_ddt_stat_object = zap_create(ddt->ddt_os,
1065      -                    DMU_OT_DDT_STATS, DMU_OT_NONE, 0, tx);
1066      -                VERIFY(zap_add(ddt->ddt_os, DMU_POOL_DIRECTORY_OBJECT,
1067      -                    DMU_POOL_DDT_STATS, sizeof (uint64_t), 1,
1068      -                    &spa->spa_ddt_stat_object, tx) == 0);
     1065 +                spa->spa_ddt_stat_object = zap_create_link(ddt->ddt_os,
     1066 +                    DMU_OT_DDT_STATS, DMU_POOL_DIRECTORY_OBJECT,
     1067 +                    DMU_POOL_DDT_STATS, tx);
1069 1068          }
1070 1069  
1071 1070          while ((dde = avl_destroy_nodes(&ddt->ddt_tree, &cookie)) != NULL) {
1072 1071                  ddt_sync_entry(ddt, dde, tx, txg);
1073 1072                  ddt_free(dde);
1074 1073          }
1075 1074  
1076 1075          for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
1077 1076                  uint64_t count = 0;
1078 1077                  for (enum ddt_class class = 0; class < DDT_CLASSES; class++) {
1079 1078                          if (ddt_object_exists(ddt, type, class)) {
1080 1079                                  ddt_object_sync(ddt, type, class, tx);
1081 1080                                  count += ddt_object_count(ddt, type, class);
1082 1081                          }
1083 1082                  }
1084 1083                  for (enum ddt_class class = 0; class < DDT_CLASSES; class++) {
1085 1084                          if (count == 0 && ddt_object_exists(ddt, type, class))
1086 1085                                  ddt_object_destroy(ddt, type, class, tx);
1087 1086                  }
1088 1087          }
1089 1088  
1090 1089          bcopy(ddt->ddt_histogram, &ddt->ddt_histogram_cache,
1091 1090              sizeof (ddt->ddt_histogram));
1092 1091  }
1093 1092  
1094 1093  void
1095 1094  ddt_sync(spa_t *spa, uint64_t txg)
1096 1095  {
1097 1096          dmu_tx_t *tx;
1098 1097          zio_t *rio = zio_root(spa, NULL, NULL,
1099 1098              ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE);
1100 1099  
1101 1100          ASSERT(spa_syncing_txg(spa) == txg);
1102 1101  
1103 1102          tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg);
1104 1103  
1105 1104          for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
1106 1105                  ddt_t *ddt = spa->spa_ddt[c];
1107 1106                  if (ddt == NULL)
1108 1107                          continue;
1109 1108                  ddt_sync_table(ddt, tx, txg);
1110 1109                  ddt_repair_table(ddt, rio);
1111 1110          }
1112 1111  
1113 1112          (void) zio_wait(rio);
1114 1113  
1115 1114          dmu_tx_commit(tx);
1116 1115  }
1117 1116  
1118 1117  int
1119 1118  ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde)
1120 1119  {
1121 1120          do {
1122 1121                  do {
1123 1122                          do {
1124 1123                                  ddt_t *ddt = spa->spa_ddt[ddb->ddb_checksum];
1125 1124                                  int error = ENOENT;
1126 1125                                  if (ddt_object_exists(ddt, ddb->ddb_type,
1127 1126                                      ddb->ddb_class)) {
1128 1127                                          error = ddt_object_walk(ddt,
1129 1128                                              ddb->ddb_type, ddb->ddb_class,
1130 1129                                              &ddb->ddb_cursor, dde);
1131 1130                                  }
1132 1131                                  dde->dde_type = ddb->ddb_type;
1133 1132                                  dde->dde_class = ddb->ddb_class;
1134 1133                                  if (error == 0)
1135 1134                                          return (0);
1136 1135                                  if (error != ENOENT)
1137 1136                                          return (error);
1138 1137                                  ddb->ddb_cursor = 0;
1139 1138                          } while (++ddb->ddb_checksum < ZIO_CHECKSUM_FUNCTIONS);
1140 1139                          ddb->ddb_checksum = 0;
1141 1140                  } while (++ddb->ddb_type < DDT_TYPES);
1142 1141                  ddb->ddb_type = 0;
1143 1142          } while (++ddb->ddb_class < DDT_CLASSES);
1144 1143  
1145 1144          return (ENOENT);
1146 1145  }
  
    | 
      ↓ open down ↓ | 
    68 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX