Print this page
NEX-10069 ZFS_READONLY is a little too strict (fix test lint)
NEX-9553 Move ss_fill gap logic from scan algorithm into range_tree.c
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
NEX-9908 System crashes when a user starts scrub for a pool with enabled WBC
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-9562 Attaching a vdev while resilver/scrub is running causes panic.
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-6088 ZFS scrub/resilver take excessively long due to issuing lots of random IO
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-3508 CLONE - Port NEX-2946 Add UNMAP/TRIM functionality to ZFS and illumos
Reviewed by: Josef Sipek <josef.sipek@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Conflicts:
    usr/src/uts/common/io/scsi/targets/sd.c
    usr/src/uts/common/sys/scsi/targets/sddef.h

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/range_tree.c
          +++ new/usr/src/uts/common/fs/zfs/range_tree.c
↓ open down ↓ 15 lines elided ↑ open up ↑
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
  25   25  /*
  26      - * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
       26 + * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
  27   27   */
  28   28  
  29   29  #include <sys/zfs_context.h>
  30   30  #include <sys/spa.h>
  31   31  #include <sys/dmu.h>
  32   32  #include <sys/dnode.h>
  33   33  #include <sys/zio.h>
  34   34  #include <sys/range_tree.h>
  35   35  
       36 +/*
       37 + * Range trees are tree-based data structures that can be used to
       38 + * track free space or generally any space allocation information.
       39 + * A range tree keeps track of individual segments and automatically
       40 + * provides facilities such as adjacent extent merging and extent
       41 + * splitting in response to range add/remove requests.
       42 + *
       43 + * A range tree starts out completely empty, with no segments in it.
       44 + * Adding an allocation via range_tree_add to the range tree can either:
       45 + * 1) create a new extent
       46 + * 2) extend an adjacent extent
       47 + * 3) merge two adjacent extents
       48 + * Conversely, removing an allocation via range_tree_add can:
       49 + * 1) completely remove an extent
       50 + * 2) shorten an extent (if the allocation was near one of its ends)
       51 + * 3) split an extent into two extents, in effect punching a hole
       52 + *
       53 + * A range tree is also capable of 'bridging' gaps when adding
       54 + * allocations. This is useful for cases when close proximity of
       55 + * allocations is an important detail that needs to be represented
       56 + * in the range tree. See range_tree_set_gap(). The default behavior
       57 + * is not to bridge gaps (i.e. the maximum allowed gap size is 0).
       58 + *
       59 + * In order to traverse a range tree, use either the range_tree_walk
       60 + * or range_tree_vacate functions.
       61 + *
       62 + * To obtain more accurate information on individual segment
       63 + * operations that the range tree performs "under the hood", you can
       64 + * specify a set of callbacks by passing a range_tree_ops_t structure
       65 + * to the range_tree_create and range_tree_create_custom functions.
       66 + * Any callbacks that are non-NULL are then called at the appropriate
       67 + * times.
       68 + *
       69 + * It is possible to store additional custom information with each
       70 + * segment. This is typically useful when using one or more of the
       71 + * operation callbacks mentioned above. To do so, use the
       72 + * range_tree_create_custom function to create your range tree and
       73 + * pass a custom kmem cache allocator. This allocator must allocate
       74 + * objects of at least sizeof (range_seg_t). The range tree will use
       75 + * the start of that object as a range_seg_t to keep its internal
       76 + * data structures and you can use the remainder of the object to
       77 + * store arbitrary additional fields as necessary.
       78 + */
       79 +
  36   80  kmem_cache_t *range_seg_cache;
  37   81  
  38   82  void
  39   83  range_tree_init(void)
  40   84  {
  41   85          ASSERT(range_seg_cache == NULL);
  42   86          range_seg_cache = kmem_cache_create("range_seg_cache",
  43   87              sizeof (range_seg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
  44   88  }
  45   89  
↓ open down ↓ 22 lines elided ↑ open up ↑
  68  112  
  69  113          for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) {
  70  114                  if (hist[i] != rt->rt_histogram[i]) {
  71  115                          zfs_dbgmsg("i=%d, hist=%p, hist=%llu, rt_hist=%llu",
  72  116                              i, hist, hist[i], rt->rt_histogram[i]);
  73  117                  }
  74  118                  VERIFY3U(hist[i], ==, rt->rt_histogram[i]);
  75  119          }
  76  120  }
  77  121  
      122 +/*
      123 + * Sets the inter-segment gap. Subsequent adding of segments will examine
      124 + * if an adjacent segment is less than or equal to `gap' apart. If it is,
      125 + * the segments will be joined together, in effect 'bridging' the gap.
      126 + */
      127 +void
      128 +range_tree_set_gap(range_tree_t *rt, uint64_t gap)
      129 +{
      130 +        ASSERT(MUTEX_HELD(rt->rt_lock));
      131 +        rt->rt_gap = gap;
      132 +}
      133 +
      134 +/*
      135 + * Changes out the lock used by the range tree. Useful when you are moving
      136 + * the range tree between containing structures without having to recreate
      137 + * it. Both the old and new locks must be held by the caller.
      138 + */
      139 +void
      140 +range_tree_set_lock(range_tree_t *rt, kmutex_t *lp)
      141 +{
      142 +        ASSERT(MUTEX_HELD(rt->rt_lock) && MUTEX_HELD(lp));
      143 +        rt->rt_lock = lp;
      144 +}
      145 +
  78  146  static void
  79  147  range_tree_stat_incr(range_tree_t *rt, range_seg_t *rs)
  80  148  {
  81  149          uint64_t size = rs->rs_end - rs->rs_start;
  82  150          int idx = highbit64(size) - 1;
  83  151  
  84  152          ASSERT(size != 0);
  85  153          ASSERT3U(idx, <,
  86  154              sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram));
  87  155  
      156 +        ASSERT(MUTEX_HELD(rt->rt_lock));
  88  157          rt->rt_histogram[idx]++;
  89  158          ASSERT3U(rt->rt_histogram[idx], !=, 0);
  90  159  }
  91  160  
  92  161  static void
  93  162  range_tree_stat_decr(range_tree_t *rt, range_seg_t *rs)
  94  163  {
  95  164          uint64_t size = rs->rs_end - rs->rs_start;
  96  165          int idx = highbit64(size) - 1;
  97  166  
  98  167          ASSERT(size != 0);
  99  168          ASSERT3U(idx, <,
 100  169              sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram));
 101  170  
      171 +        ASSERT(MUTEX_HELD(rt->rt_lock));
 102  172          ASSERT3U(rt->rt_histogram[idx], !=, 0);
 103  173          rt->rt_histogram[idx]--;
 104  174  }
 105  175  
 106  176  /*
 107  177   * NOTE: caller is responsible for all locking.
 108  178   */
 109  179  static int
 110  180  range_tree_seg_compare(const void *x1, const void *x2)
 111  181  {
↓ open down ↓ 7 lines elided ↑ open up ↑
 119  189          }
 120  190          if (r1->rs_start > r2->rs_start) {
 121  191                  if (r1->rs_start < r2->rs_end)
 122  192                          return (0);
 123  193                  return (1);
 124  194          }
 125  195          return (0);
 126  196  }
 127  197  
 128  198  range_tree_t *
 129      -range_tree_create(range_tree_ops_t *ops, void *arg)
      199 +range_tree_create(range_tree_ops_t *ops, void *arg, kmutex_t *lp)
 130  200  {
 131      -        range_tree_t *rt;
      201 +        range_tree_t *rt = kmem_zalloc(sizeof (range_tree_t), KM_SLEEP);
 132  202  
 133      -        rt = kmem_zalloc(sizeof (range_tree_t), KM_SLEEP);
 134      -
 135  203          avl_create(&rt->rt_root, range_tree_seg_compare,
 136  204              sizeof (range_seg_t), offsetof(range_seg_t, rs_node));
 137  205  
      206 +        rt->rt_lock = lp;
 138  207          rt->rt_ops = ops;
 139  208          rt->rt_arg = arg;
 140  209  
 141      -        if (rt->rt_ops != NULL)
      210 +        if (rt->rt_ops != NULL && rt->rt_ops->rtop_create != NULL)
 142  211                  rt->rt_ops->rtop_create(rt, rt->rt_arg);
 143  212  
 144  213          return (rt);
 145  214  }
 146  215  
 147  216  void
 148  217  range_tree_destroy(range_tree_t *rt)
 149  218  {
 150  219          VERIFY0(rt->rt_space);
 151  220  
 152      -        if (rt->rt_ops != NULL)
      221 +        if (rt->rt_ops != NULL && rt->rt_ops->rtop_destroy != NULL)
 153  222                  rt->rt_ops->rtop_destroy(rt, rt->rt_arg);
 154  223  
 155  224          avl_destroy(&rt->rt_root);
 156  225          kmem_free(rt, sizeof (*rt));
 157  226  }
 158  227  
 159  228  void
 160      -range_tree_add(void *arg, uint64_t start, uint64_t size)
      229 +range_tree_add_fill(void *arg, uint64_t start, uint64_t size, uint64_t fill)
 161  230  {
 162  231          range_tree_t *rt = arg;
 163  232          avl_index_t where;
 164  233          range_seg_t rsearch, *rs_before, *rs_after, *rs;
 165      -        uint64_t end = start + size;
      234 +        uint64_t end = start + size, gap_extra = 0;
 166  235          boolean_t merge_before, merge_after;
 167  236  
      237 +        ASSERT(MUTEX_HELD(rt->rt_lock));
 168  238          VERIFY(size != 0);
 169  239  
 170  240          rsearch.rs_start = start;
 171  241          rsearch.rs_end = end;
 172  242          rs = avl_find(&rt->rt_root, &rsearch, &where);
 173  243  
 174      -        if (rs != NULL && rs->rs_start <= start && rs->rs_end >= end) {
      244 +        if (rs != NULL && rs->rs_start <= start && rs->rs_end >= end &&
      245 +            rt->rt_gap == 0) {
 175  246                  zfs_panic_recover("zfs: allocating allocated segment"
 176  247                      "(offset=%llu size=%llu)\n",
 177  248                      (longlong_t)start, (longlong_t)size);
 178  249                  return;
 179  250          }
 180  251  
 181      -        /* Make sure we don't overlap with either of our neighbors */
 182      -        VERIFY(rs == NULL);
      252 +        if (rt->rt_gap != 0) {
      253 +                if (rs != NULL) {
      254 +                        if (rs->rs_start <= start && rs->rs_end >= end) {
      255 +                                if (rt->rt_ops != NULL &&
      256 +                                    rt->rt_ops->rtop_remove != NULL)
      257 +                                        rt->rt_ops->rtop_remove(rt, rs,
      258 +                                            rt->rt_arg);
      259 +                                rs->rs_fill += fill;
      260 +                                if (rt->rt_ops != NULL &&
      261 +                                    rt->rt_ops->rtop_add != NULL) {
      262 +                                        rt->rt_ops->rtop_add(rt, rs,
      263 +                                            rt->rt_arg);
      264 +                                }
      265 +                                return;
      266 +                        }
      267 +                        ASSERT0(fill);
      268 +                        if (rs->rs_start < start) {
      269 +                                ASSERT3U(end, >, rs->rs_end);
      270 +                                range_tree_add(rt, rs->rs_end, end -
      271 +                                    rs->rs_end);
      272 +                        } else {
      273 +                                ASSERT3U(rs->rs_start, >, start);
      274 +                                range_tree_add(rt, start, rs->rs_start - start);
      275 +                        }
      276 +                        return;
      277 +                }
      278 +        } else {
      279 +                /* Make sure we don't overlap with either of our neighbors */
      280 +                VERIFY(rs == NULL);
      281 +        }
 183  282  
 184  283          rs_before = avl_nearest(&rt->rt_root, where, AVL_BEFORE);
 185  284          rs_after = avl_nearest(&rt->rt_root, where, AVL_AFTER);
 186  285  
 187      -        merge_before = (rs_before != NULL && rs_before->rs_end == start);
 188      -        merge_after = (rs_after != NULL && rs_after->rs_start == end);
      286 +        merge_before = (rs_before != NULL &&
      287 +            start - rs_before->rs_end <= rt->rt_gap);
      288 +        merge_after = (rs_after != NULL &&
      289 +            rs_after->rs_start - end <= rt->rt_gap);
      290 +        if (rt->rt_gap != 0) {
      291 +                if (merge_before)
      292 +                        gap_extra += start - rs_before->rs_end;
      293 +                if (merge_after)
      294 +                        gap_extra += rs_after->rs_start - end;
      295 +        }
 189  296  
 190  297          if (merge_before && merge_after) {
 191  298                  avl_remove(&rt->rt_root, rs_before);
 192      -                if (rt->rt_ops != NULL) {
      299 +                if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) {
 193  300                          rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg);
 194  301                          rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg);
 195  302                  }
 196  303  
 197  304                  range_tree_stat_decr(rt, rs_before);
 198  305                  range_tree_stat_decr(rt, rs_after);
 199  306  
 200  307                  rs_after->rs_start = rs_before->rs_start;
      308 +                rs_after->rs_fill += rs_before->rs_fill + fill;
 201  309                  kmem_cache_free(range_seg_cache, rs_before);
 202  310                  rs = rs_after;
 203  311          } else if (merge_before) {
 204      -                if (rt->rt_ops != NULL)
      312 +                if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL)
 205  313                          rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg);
 206  314  
 207  315                  range_tree_stat_decr(rt, rs_before);
 208  316  
 209  317                  rs_before->rs_end = end;
      318 +                rs_before->rs_fill += fill;
 210  319                  rs = rs_before;
 211  320          } else if (merge_after) {
 212      -                if (rt->rt_ops != NULL)
      321 +                if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL)
 213  322                          rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg);
 214  323  
 215  324                  range_tree_stat_decr(rt, rs_after);
 216  325  
 217  326                  rs_after->rs_start = start;
      327 +                rs_after->rs_fill += fill;
 218  328                  rs = rs_after;
 219  329          } else {
 220  330                  rs = kmem_cache_alloc(range_seg_cache, KM_SLEEP);
 221  331                  rs->rs_start = start;
 222  332                  rs->rs_end = end;
      333 +                rs->rs_fill = fill;
 223  334                  avl_insert(&rt->rt_root, rs, where);
 224  335          }
 225  336  
 226      -        if (rt->rt_ops != NULL)
      337 +        if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL)
 227  338                  rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
 228  339  
 229  340          range_tree_stat_incr(rt, rs);
 230      -        rt->rt_space += size;
      341 +        rt->rt_space += size + gap_extra;
 231  342  }
 232  343  
 233  344  void
 234      -range_tree_remove(void *arg, uint64_t start, uint64_t size)
      345 +range_tree_add(void *arg, uint64_t start, uint64_t size)
 235  346  {
      347 +        range_tree_add_fill(arg, start, size, 0);
      348 +}
      349 +
      350 +static void
      351 +range_tree_remove_impl(void *arg, uint64_t start, uint64_t size, uint64_t fill,
      352 +    uint64_t fill_left, boolean_t partial_overlap)
      353 +{
 236  354          range_tree_t *rt = arg;
 237  355          avl_index_t where;
 238  356          range_seg_t rsearch, *rs, *newseg;
 239  357          uint64_t end = start + size;
 240  358          boolean_t left_over, right_over;
 241  359  
      360 +        ASSERT(MUTEX_HELD(rt->rt_lock));
 242  361          VERIFY3U(size, !=, 0);
 243      -        VERIFY3U(size, <=, rt->rt_space);
      362 +        if (!partial_overlap) {
      363 +                VERIFY3U(size, <=, rt->rt_space);
      364 +        } else {
      365 +                VERIFY0(fill);
      366 +                VERIFY0(fill_left);
      367 +        }
 244  368  
 245  369          rsearch.rs_start = start;
 246  370          rsearch.rs_end = end;
 247      -        rs = avl_find(&rt->rt_root, &rsearch, &where);
 248  371  
 249      -        /* Make sure we completely overlap with someone */
 250      -        if (rs == NULL) {
 251      -                zfs_panic_recover("zfs: freeing free segment "
 252      -                    "(offset=%llu size=%llu)",
 253      -                    (longlong_t)start, (longlong_t)size);
 254      -                return;
 255      -        }
 256      -        VERIFY3U(rs->rs_start, <=, start);
 257      -        VERIFY3U(rs->rs_end, >=, end);
      372 +        while ((rs = avl_find(&rt->rt_root, &rsearch, &where)) != NULL ||
      373 +            !partial_overlap) {
      374 +                uint64_t overlap_sz;
 258  375  
 259      -        left_over = (rs->rs_start != start);
 260      -        right_over = (rs->rs_end != end);
      376 +                if (partial_overlap) {
      377 +                        if (rs->rs_start <= start && rs->rs_end >= end)
      378 +                                overlap_sz = size;
      379 +                        else if (rs->rs_start > start && rs->rs_end < end)
      380 +                                overlap_sz = rs->rs_end - rs->rs_start;
      381 +                        else if (rs->rs_end < end)
      382 +                                overlap_sz = rs->rs_end - start;
      383 +                        else    /* rs->rs_start > start */
      384 +                                overlap_sz = end - rs->rs_start;
      385 +                } else {
      386 +                        /* Make sure we completely overlapped with someone */
      387 +                        if (rs == NULL) {
      388 +                                zfs_panic_recover("zfs: freeing free segment "
      389 +                                    "(offset=%llu size=%llu)",
      390 +                                    (longlong_t)start, (longlong_t)size);
      391 +                                return;
      392 +                        }
      393 +                        VERIFY3U(rs->rs_start, <=, start);
      394 +                        VERIFY3U(rs->rs_end, >=, end);
      395 +                        overlap_sz = size;
      396 +                }
 261  397  
 262      -        range_tree_stat_decr(rt, rs);
      398 +                left_over = (rs->rs_start < start);
      399 +                right_over = (rs->rs_end > end);
 263  400  
 264      -        if (rt->rt_ops != NULL)
 265      -                rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg);
      401 +                range_tree_stat_decr(rt, rs);
 266  402  
 267      -        if (left_over && right_over) {
 268      -                newseg = kmem_cache_alloc(range_seg_cache, KM_SLEEP);
 269      -                newseg->rs_start = end;
 270      -                newseg->rs_end = rs->rs_end;
 271      -                range_tree_stat_incr(rt, newseg);
      403 +                if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL)
      404 +                        rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg);
 272  405  
 273      -                rs->rs_end = start;
      406 +                if (left_over && right_over) {
      407 +                        newseg = kmem_cache_alloc(range_seg_cache, KM_SLEEP);
      408 +                        newseg->rs_start = end;
      409 +                        newseg->rs_end = rs->rs_end;
      410 +                        ASSERT3U(rs->rs_fill, >=, (fill + fill_left));
      411 +                        newseg->rs_fill = rs->rs_fill - (fill + fill_left);
      412 +                        range_tree_stat_incr(rt, newseg);
 274  413  
 275      -                avl_insert_here(&rt->rt_root, newseg, rs, AVL_AFTER);
 276      -                if (rt->rt_ops != NULL)
 277      -                        rt->rt_ops->rtop_add(rt, newseg, rt->rt_arg);
 278      -        } else if (left_over) {
 279      -                rs->rs_end = start;
 280      -        } else if (right_over) {
 281      -                rs->rs_start = end;
 282      -        } else {
 283      -                avl_remove(&rt->rt_root, rs);
 284      -                kmem_cache_free(range_seg_cache, rs);
 285      -                rs = NULL;
 286      -        }
      414 +                        rs->rs_end = start;
      415 +                        rs->rs_fill = fill_left;
 287  416  
 288      -        if (rs != NULL) {
 289      -                range_tree_stat_incr(rt, rs);
      417 +                        avl_insert_here(&rt->rt_root, newseg, rs, AVL_AFTER);
      418 +                        if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL)
      419 +                                rt->rt_ops->rtop_add(rt, newseg, rt->rt_arg);
      420 +                } else if (left_over) {
      421 +                        rs->rs_end = start;
      422 +                        ASSERT3U(rs->rs_fill, >=, fill);
      423 +                        rs->rs_fill -= fill;
      424 +                } else if (right_over) {
      425 +                        rs->rs_start = end;
      426 +                        ASSERT3U(rs->rs_fill, >=, fill);
      427 +                        rs->rs_fill -= fill;
      428 +                } else {
      429 +                        ASSERT3U(rs->rs_fill, ==, fill);
      430 +                        ASSERT(fill == 0 || !partial_overlap);
      431 +                        avl_remove(&rt->rt_root, rs);
      432 +                        kmem_cache_free(range_seg_cache, rs);
      433 +                        rs = NULL;
      434 +                }
 290  435  
 291      -                if (rt->rt_ops != NULL)
 292      -                        rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
      436 +                if (rs != NULL) {
      437 +                        range_tree_stat_incr(rt, rs);
      438 +
      439 +                        if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL)
      440 +                                rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
      441 +                }
      442 +
      443 +                rt->rt_space -= overlap_sz;
      444 +                if (!partial_overlap) {
      445 +                        /*
      446 +                         * There can't be any more segments overlapping with
      447 +                         * us, so no sense in performing an extra search.
      448 +                         */
      449 +                        break;
      450 +                }
 293  451          }
      452 +}
 294  453  
 295      -        rt->rt_space -= size;
      454 +void
      455 +range_tree_remove(void *arg, uint64_t start, uint64_t size)
      456 +{
      457 +        range_tree_remove_impl(arg, start, size, 0, 0, B_FALSE);
 296  458  }
 297  459  
      460 +void
      461 +range_tree_remove_overlap(void *arg, uint64_t start, uint64_t size)
      462 +{
      463 +        range_tree_remove_impl(arg, start, size, 0, 0, B_TRUE);
      464 +}
      465 +
      466 +void
      467 +range_tree_remove_fill(void *arg, uint64_t start, uint64_t size,
      468 +    uint64_t fill, uint64_t remain_left)
      469 +{
      470 +        range_tree_remove_impl(arg, start, size, fill, remain_left, B_FALSE);
      471 +}
      472 +
 298  473  static range_seg_t *
 299  474  range_tree_find_impl(range_tree_t *rt, uint64_t start, uint64_t size)
 300  475  {
 301  476          avl_index_t where;
 302  477          range_seg_t rsearch;
 303  478          uint64_t end = start + size;
 304  479  
      480 +        ASSERT(MUTEX_HELD(rt->rt_lock));
 305  481          VERIFY(size != 0);
 306  482  
 307  483          rsearch.rs_start = start;
 308  484          rsearch.rs_end = end;
 309  485          return (avl_find(&rt->rt_root, &rsearch, &where));
 310  486  }
 311  487  
 312      -static range_seg_t *
      488 +void *
 313  489  range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size)
 314  490  {
 315  491          range_seg_t *rs = range_tree_find_impl(rt, start, size);
 316  492          if (rs != NULL && rs->rs_start <= start && rs->rs_end >= start + size)
 317  493                  return (rs);
 318  494          return (NULL);
 319  495  }
 320  496  
      497 +/*
      498 + * Given an extent start offset and size, will look through the provided
      499 + * range tree and find a suitable start offset (starting at `start') such
      500 + * that the requested extent _doesn't_ overlap with any range segment in
      501 + * the range tree.
      502 + */
      503 +uint64_t
      504 +range_tree_find_gap(range_tree_t *rt, uint64_t start, uint64_t size)
      505 +{
      506 +        range_seg_t *rs;
      507 +        while ((rs = range_tree_find_impl(rt, start, size)) != NULL)
      508 +                start = rs->rs_end;
      509 +        return (start);
      510 +}
      511 +
 321  512  void
 322  513  range_tree_verify(range_tree_t *rt, uint64_t off, uint64_t size)
 323  514  {
 324  515          range_seg_t *rs;
 325  516  
      517 +        mutex_enter(rt->rt_lock);
 326  518          rs = range_tree_find(rt, off, size);
 327  519          if (rs != NULL)
 328  520                  panic("freeing free block; rs=%p", (void *)rs);
      521 +        mutex_exit(rt->rt_lock);
 329  522  }
 330  523  
 331  524  boolean_t
 332  525  range_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size)
 333  526  {
 334  527          return (range_tree_find(rt, start, size) != NULL);
 335  528  }
 336  529  
 337  530  /*
 338  531   * Ensure that this range is not in the tree, regardless of whether
 339  532   * it is currently in the tree.
 340  533   */
 341  534  void
 342  535  range_tree_clear(range_tree_t *rt, uint64_t start, uint64_t size)
 343  536  {
 344  537          range_seg_t *rs;
 345  538  
 346      -        if (size == 0)
 347      -                return;
 348      -
 349  539          while ((rs = range_tree_find_impl(rt, start, size)) != NULL) {
 350  540                  uint64_t free_start = MAX(rs->rs_start, start);
 351  541                  uint64_t free_end = MIN(rs->rs_end, start + size);
 352  542                  range_tree_remove(rt, free_start, free_end - free_start);
 353  543          }
 354  544  }
 355  545  
 356  546  void
 357  547  range_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst)
 358  548  {
 359  549          range_tree_t *rt;
 360  550  
      551 +        ASSERT(MUTEX_HELD((*rtsrc)->rt_lock));
 361  552          ASSERT0(range_tree_space(*rtdst));
 362  553          ASSERT0(avl_numnodes(&(*rtdst)->rt_root));
 363  554  
 364  555          rt = *rtsrc;
 365  556          *rtsrc = *rtdst;
 366  557          *rtdst = rt;
 367  558  }
 368  559  
 369  560  void
 370  561  range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg)
 371  562  {
 372  563          range_seg_t *rs;
 373  564          void *cookie = NULL;
 374  565  
      566 +        ASSERT(MUTEX_HELD(rt->rt_lock));
 375  567  
 376      -        if (rt->rt_ops != NULL)
      568 +        if (rt->rt_ops != NULL && rt->rt_ops->rtop_vacate != NULL)
 377  569                  rt->rt_ops->rtop_vacate(rt, rt->rt_arg);
 378  570  
 379  571          while ((rs = avl_destroy_nodes(&rt->rt_root, &cookie)) != NULL) {
 380  572                  if (func != NULL)
 381  573                          func(arg, rs->rs_start, rs->rs_end - rs->rs_start);
 382  574                  kmem_cache_free(range_seg_cache, rs);
 383  575          }
 384  576  
 385  577          bzero(rt->rt_histogram, sizeof (rt->rt_histogram));
 386  578          rt->rt_space = 0;
 387  579  }
 388  580  
 389  581  void
 390  582  range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg)
 391  583  {
 392  584          range_seg_t *rs;
 393  585  
      586 +        ASSERT(MUTEX_HELD(rt->rt_lock));
 394  587  
 395  588          for (rs = avl_first(&rt->rt_root); rs; rs = AVL_NEXT(&rt->rt_root, rs))
 396  589                  func(arg, rs->rs_start, rs->rs_end - rs->rs_start);
 397  590  }
 398  591  
      592 +range_seg_t *
      593 +range_tree_first(range_tree_t *rt)
      594 +{
      595 +        ASSERT(MUTEX_HELD(rt->rt_lock));
      596 +        return (avl_first(&rt->rt_root));
      597 +}
      598 +
 399  599  uint64_t
 400  600  range_tree_space(range_tree_t *rt)
 401  601  {
 402  602          return (rt->rt_space);
 403  603  }
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX