Print this page
NEX-4582 update wrc test cases for allow to use write back cache per tree of datasets
Reviewed by: Steve Peng <steve.peng@nexenta.com>
Reviewed by: Alex Aizman <alex.aizman@nexenta.com>
5960 zfs recv should prefetch indirect blocks
5925 zfs receive -o origin=
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/space_map.c
          +++ new/usr/src/uts/common/fs/zfs/space_map.c
↓ open down ↓ 38 lines elided ↑ open up ↑
  39   39  
  40   40  /*
  41   41   * The data for a given space map can be kept on blocks of any size.
  42   42   * Larger blocks entail fewer i/o operations, but they also cause the
  43   43   * DMU to keep more data in-core, and also to waste more i/o bandwidth
  44   44   * when only a few blocks have changed since the last transaction group.
  45   45   */
  46   46  int space_map_blksz = (1 << 12);
  47   47  
  48   48  /*
  49      - * Iterate through the space map, invoking the callback on each (non-debug)
  50      - * space map entry.
       49 + * Load the space map disk into the specified range tree. Segments of maptype
       50 + * are added to the range tree, other segment types are removed.
       51 + *
       52 + * Note: space_map_load() will drop sm_lock across dmu_read() calls.
       53 + * The caller must be OK with this.
  51   54   */
  52   55  int
  53      -space_map_iterate(space_map_t *sm, sm_cb_t callback, void *arg)
       56 +space_map_load(space_map_t *sm, range_tree_t *rt, maptype_t maptype)
  54   57  {
  55   58          uint64_t *entry, *entry_map, *entry_map_end;
  56      -        uint64_t bufsize, size, offset, end;
       59 +        uint64_t bufsize, size, offset, end, space;
  57   60          int error = 0;
  58   61  
       62 +        ASSERT(MUTEX_HELD(sm->sm_lock));
       63 +
  59   64          end = space_map_length(sm);
       65 +        space = space_map_allocated(sm);
  60   66  
       67 +        VERIFY0(range_tree_space(rt));
       68 +
       69 +        if (maptype == SM_FREE) {
       70 +                range_tree_add(rt, sm->sm_start, sm->sm_size);
       71 +                space = sm->sm_size - space;
       72 +        }
       73 +
  61   74          bufsize = MAX(sm->sm_blksz, SPA_MINBLOCKSIZE);
  62   75          entry_map = zio_buf_alloc(bufsize);
  63   76  
       77 +        mutex_exit(sm->sm_lock);
  64   78          if (end > bufsize) {
  65   79                  dmu_prefetch(sm->sm_os, space_map_object(sm), 0, bufsize,
  66   80                      end - bufsize, ZIO_PRIORITY_SYNC_READ);
  67   81          }
       82 +        mutex_enter(sm->sm_lock);
  68   83  
  69      -        for (offset = 0; offset < end && error == 0; offset += bufsize) {
       84 +        for (offset = 0; offset < end; offset += bufsize) {
  70   85                  size = MIN(end - offset, bufsize);
  71   86                  VERIFY(P2PHASE(size, sizeof (uint64_t)) == 0);
  72   87                  VERIFY(size != 0);
  73   88                  ASSERT3U(sm->sm_blksz, !=, 0);
  74   89  
  75   90                  dprintf("object=%llu  offset=%llx  size=%llx\n",
  76   91                      space_map_object(sm), offset, size);
  77   92  
       93 +                mutex_exit(sm->sm_lock);
  78   94                  error = dmu_read(sm->sm_os, space_map_object(sm), offset, size,
  79   95                      entry_map, DMU_READ_PREFETCH);
       96 +                mutex_enter(sm->sm_lock);
  80   97                  if (error != 0)
  81   98                          break;
  82   99  
  83  100                  entry_map_end = entry_map + (size / sizeof (uint64_t));
  84      -                for (entry = entry_map; entry < entry_map_end && error == 0;
  85      -                    entry++) {
      101 +                for (entry = entry_map; entry < entry_map_end; entry++) {
  86  102                          uint64_t e = *entry;
  87  103                          uint64_t offset, size;
  88  104  
  89      -                        if (SM_DEBUG_DECODE(e)) /* Skip debug entries */
      105 +                        if (SM_DEBUG_DECODE(e))         /* Skip debug entries */
  90  106                                  continue;
  91  107  
  92  108                          offset = (SM_OFFSET_DECODE(e) << sm->sm_shift) +
  93  109                              sm->sm_start;
  94  110                          size = SM_RUN_DECODE(e) << sm->sm_shift;
  95  111  
  96  112                          VERIFY0(P2PHASE(offset, 1ULL << sm->sm_shift));
  97  113                          VERIFY0(P2PHASE(size, 1ULL << sm->sm_shift));
  98  114                          VERIFY3U(offset, >=, sm->sm_start);
  99  115                          VERIFY3U(offset + size, <=, sm->sm_start + sm->sm_size);
 100      -                        error = callback(SM_TYPE_DECODE(e), offset, size, arg);
      116 +                        if (SM_TYPE_DECODE(e) == maptype) {
      117 +                                VERIFY3U(range_tree_space(rt) + size, <=,
      118 +                                    sm->sm_size);
      119 +                                range_tree_add(rt, offset, size);
      120 +                        } else {
      121 +                                range_tree_remove(rt, offset, size);
      122 +                        }
 101  123                  }
 102  124          }
 103  125  
 104      -        zio_buf_free(entry_map, bufsize);
 105      -        return (error);
 106      -}
 107      -
 108      -typedef struct space_map_load_arg {
 109      -        space_map_t     *smla_sm;
 110      -        range_tree_t    *smla_rt;
 111      -        maptype_t       smla_type;
 112      -} space_map_load_arg_t;
 113      -
 114      -static int
 115      -space_map_load_callback(maptype_t type, uint64_t offset, uint64_t size,
 116      -    void *arg)
 117      -{
 118      -        space_map_load_arg_t *smla = arg;
 119      -        if (type == smla->smla_type) {
 120      -                VERIFY3U(range_tree_space(smla->smla_rt) + size, <=,
 121      -                    smla->smla_sm->sm_size);
 122      -                range_tree_add(smla->smla_rt, offset, size);
 123      -        } else {
 124      -                range_tree_remove(smla->smla_rt, offset, size);
 125      -        }
 126      -
 127      -        return (0);
 128      -}
 129      -
 130      -/*
 131      - * Load the space map disk into the specified range tree. Segments of maptype
 132      - * are added to the range tree, other segment types are removed.
 133      - */
 134      -int
 135      -space_map_load(space_map_t *sm, range_tree_t *rt, maptype_t maptype)
 136      -{
 137      -        uint64_t space;
 138      -        int err;
 139      -        space_map_load_arg_t smla;
 140      -
 141      -        VERIFY0(range_tree_space(rt));
 142      -        space = space_map_allocated(sm);
 143      -
 144      -        if (maptype == SM_FREE) {
 145      -                range_tree_add(rt, sm->sm_start, sm->sm_size);
 146      -                space = sm->sm_size - space;
 147      -        }
 148      -
 149      -        smla.smla_rt = rt;
 150      -        smla.smla_sm = sm;
 151      -        smla.smla_type = maptype;
 152      -        err = space_map_iterate(sm, space_map_load_callback, &smla);
 153      -
 154      -        if (err == 0) {
      126 +        if (error == 0)
 155  127                  VERIFY3U(range_tree_space(rt), ==, space);
 156      -        } else {
      128 +        else
 157  129                  range_tree_vacate(rt, NULL, NULL);
 158      -        }
 159  130  
 160      -        return (err);
      131 +        zio_buf_free(entry_map, bufsize);
      132 +        return (error);
 161  133  }
 162  134  
 163  135  void
 164  136  space_map_histogram_clear(space_map_t *sm)
 165  137  {
 166  138          if (sm->sm_dbuf->db_size != sizeof (space_map_phys_t))
 167  139                  return;
 168  140  
 169  141          bzero(sm->sm_phys->smp_histogram, sizeof (sm->sm_phys->smp_histogram));
 170  142  }
↓ open down ↓ 10 lines elided ↑ open up ↑
 181  153                          return (B_FALSE);
 182  154          }
 183  155          return (B_TRUE);
 184  156  }
 185  157  
 186  158  void
 187  159  space_map_histogram_add(space_map_t *sm, range_tree_t *rt, dmu_tx_t *tx)
 188  160  {
 189  161          int idx = 0;
 190  162  
      163 +        ASSERT(MUTEX_HELD(rt->rt_lock));
 191  164          ASSERT(dmu_tx_is_syncing(tx));
 192  165          VERIFY3U(space_map_object(sm), !=, 0);
 193  166  
 194  167          if (sm->sm_dbuf->db_size != sizeof (space_map_phys_t))
 195  168                  return;
 196  169  
 197  170          dmu_buf_will_dirty(sm->sm_dbuf, tx);
 198  171  
 199  172          ASSERT(space_map_histogram_verify(sm, rt));
 200  173          /*
↓ open down ↓ 48 lines elided ↑ open up ↑
 249  222           * Traverse the range tree and calculate the number of space map
 250  223           * entries that would be required to write out the range tree.
 251  224           */
 252  225          for (rs = avl_first(t); rs != NULL; rs = AVL_NEXT(t, rs)) {
 253  226                  size = (rs->rs_end - rs->rs_start) >> sm->sm_shift;
 254  227                  entries += howmany(size, SM_RUN_MAX);
 255  228          }
 256  229          return (entries);
 257  230  }
 258  231  
      232 +/*
      233 + * Note: space_map_write() will drop sm_lock across dmu_write() calls.
      234 + */
 259  235  void
 260  236  space_map_write(space_map_t *sm, range_tree_t *rt, maptype_t maptype,
 261  237      dmu_tx_t *tx)
 262  238  {
 263  239          objset_t *os = sm->sm_os;
 264  240          spa_t *spa = dmu_objset_spa(os);
 265  241          avl_tree_t *t = &rt->rt_root;
 266  242          range_seg_t *rs;
 267  243          uint64_t size, total, rt_space, nodes;
 268  244          uint64_t *entry, *entry_map, *entry_map_end;
 269  245          uint64_t expected_entries, actual_entries = 1;
 270  246  
      247 +        ASSERT(MUTEX_HELD(rt->rt_lock));
 271  248          ASSERT(dsl_pool_sync_context(dmu_objset_pool(os)));
 272  249          VERIFY3U(space_map_object(sm), !=, 0);
 273  250          dmu_buf_will_dirty(sm->sm_dbuf, tx);
 274  251  
 275  252          /*
 276  253           * This field is no longer necessary since the in-core space map
 277  254           * now contains the object number but is maintained for backwards
 278  255           * compatibility.
 279  256           */
 280  257          sm->sm_phys->smp_object = sm->sm_object;
↓ open down ↓ 29 lines elided ↑ open up ↑
 310  287                  start = (rs->rs_start - sm->sm_start) >> sm->sm_shift;
 311  288  
 312  289                  total += size << sm->sm_shift;
 313  290  
 314  291                  while (size != 0) {
 315  292                          uint64_t run_len;
 316  293  
 317  294                          run_len = MIN(size, SM_RUN_MAX);
 318  295  
 319  296                          if (entry == entry_map_end) {
      297 +                                mutex_exit(rt->rt_lock);
 320  298                                  dmu_write(os, space_map_object(sm),
 321  299                                      sm->sm_phys->smp_objsize, sm->sm_blksz,
 322  300                                      entry_map, tx);
      301 +                                mutex_enter(rt->rt_lock);
 323  302                                  sm->sm_phys->smp_objsize += sm->sm_blksz;
 324  303                                  entry = entry_map;
 325  304                          }
 326  305  
 327  306                          *entry++ = SM_OFFSET_ENCODE(start) |
 328  307                              SM_TYPE_ENCODE(maptype) |
 329  308                              SM_RUN_ENCODE(run_len);
 330  309  
 331  310                          start += run_len;
 332  311                          size -= run_len;
 333  312                          actual_entries++;
 334  313                  }
 335  314          }
 336  315  
 337  316          if (entry != entry_map) {
 338  317                  size = (entry - entry_map) * sizeof (uint64_t);
      318 +                mutex_exit(rt->rt_lock);
 339  319                  dmu_write(os, space_map_object(sm), sm->sm_phys->smp_objsize,
 340  320                      size, entry_map, tx);
      321 +                mutex_enter(rt->rt_lock);
 341  322                  sm->sm_phys->smp_objsize += size;
 342  323          }
 343  324          ASSERT3U(expected_entries, ==, actual_entries);
 344  325  
 345  326          /*
 346  327           * Ensure that the space_map's accounting wasn't changed
 347  328           * while we were in the middle of writing it out.
 348  329           */
 349  330          VERIFY3U(nodes, ==, avl_numnodes(&rt->rt_root));
 350  331          VERIFY3U(range_tree_space(rt), ==, rt_space);
↓ open down ↓ 12 lines elided ↑ open up ↑
 363  344          if (error)
 364  345                  return (error);
 365  346  
 366  347          dmu_object_size_from_db(sm->sm_dbuf, &sm->sm_blksz, &blocks);
 367  348          sm->sm_phys = sm->sm_dbuf->db_data;
 368  349          return (0);
 369  350  }
 370  351  
 371  352  int
 372  353  space_map_open(space_map_t **smp, objset_t *os, uint64_t object,
 373      -    uint64_t start, uint64_t size, uint8_t shift)
      354 +    uint64_t start, uint64_t size, uint8_t shift, kmutex_t *lp)
 374  355  {
 375  356          space_map_t *sm;
 376  357          int error;
 377  358  
 378  359          ASSERT(*smp == NULL);
 379  360          ASSERT(os != NULL);
 380  361          ASSERT(object != 0);
 381  362  
 382  363          sm = kmem_zalloc(sizeof (space_map_t), KM_SLEEP);
 383  364  
 384  365          sm->sm_start = start;
 385  366          sm->sm_size = size;
 386  367          sm->sm_shift = shift;
      368 +        sm->sm_lock = lp;
 387  369          sm->sm_os = os;
 388  370          sm->sm_object = object;
 389  371  
 390  372          error = space_map_open_impl(sm);
 391  373          if (error != 0) {
 392  374                  space_map_close(sm);
 393  375                  return (error);
 394  376          }
 395  377  
 396  378          *smp = sm;
↓ open down ↓ 68 lines elided ↑ open up ↑
 465  447  
 466  448  /*
 467  449   * Update the in-core space_map allocation and length values.
 468  450   */
 469  451  void
 470  452  space_map_update(space_map_t *sm)
 471  453  {
 472  454          if (sm == NULL)
 473  455                  return;
 474  456  
      457 +        ASSERT(MUTEX_HELD(sm->sm_lock));
      458 +
 475  459          sm->sm_alloc = sm->sm_phys->smp_alloc;
 476  460          sm->sm_length = sm->sm_phys->smp_objsize;
 477  461  }
 478  462  
 479  463  uint64_t
 480  464  space_map_alloc(objset_t *os, dmu_tx_t *tx)
 481  465  {
 482  466          spa_t *spa = dmu_objset_spa(os);
 483  467          uint64_t object;
 484  468          int bonuslen;
↓ open down ↓ 7 lines elided ↑ open up ↑
 492  476          }
 493  477  
 494  478          object = dmu_object_alloc(os,
 495  479              DMU_OT_SPACE_MAP, space_map_blksz,
 496  480              DMU_OT_SPACE_MAP_HEADER, bonuslen, tx);
 497  481  
 498  482          return (object);
 499  483  }
 500  484  
 501  485  void
 502      -space_map_free_obj(objset_t *os, uint64_t smobj, dmu_tx_t *tx)
      486 +space_map_free(space_map_t *sm, dmu_tx_t *tx)
 503  487  {
 504      -        spa_t *spa = dmu_objset_spa(os);
      488 +        spa_t *spa;
      489 +
      490 +        if (sm == NULL)
      491 +                return;
      492 +
      493 +        spa = dmu_objset_spa(sm->sm_os);
 505  494          if (spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) {
 506  495                  dmu_object_info_t doi;
 507  496  
 508      -                VERIFY0(dmu_object_info(os, smobj, &doi));
      497 +                dmu_object_info_from_db(sm->sm_dbuf, &doi);
 509  498                  if (doi.doi_bonus_size != SPACE_MAP_SIZE_V0) {
      499 +                        VERIFY(spa_feature_is_active(spa,
      500 +                            SPA_FEATURE_SPACEMAP_HISTOGRAM));
 510  501                          spa_feature_decr(spa,
 511  502                              SPA_FEATURE_SPACEMAP_HISTOGRAM, tx);
 512  503                  }
 513  504          }
 514  505  
 515      -        VERIFY0(dmu_object_free(os, smobj, tx));
 516      -}
 517      -
 518      -void
 519      -space_map_free(space_map_t *sm, dmu_tx_t *tx)
 520      -{
 521      -        if (sm == NULL)
 522      -                return;
 523      -
 524      -        space_map_free_obj(sm->sm_os, space_map_object(sm), tx);
      506 +        VERIFY3U(dmu_object_free(sm->sm_os, space_map_object(sm), tx), ==, 0);
 525  507          sm->sm_object = 0;
 526  508  }
 527  509  
 528  510  uint64_t
 529  511  space_map_object(space_map_t *sm)
 530  512  {
 531  513          return (sm != NULL ? sm->sm_object : 0);
 532  514  }
 533  515  
 534  516  /*
↓ open down ↓ 28 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX