1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 /*
  26  * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
  27  */
  28 
  29 #include <sys/zfs_context.h>
  30 #include <sys/spa.h>
  31 #include <sys/dmu.h>
  32 #include <sys/dnode.h>
  33 #include <sys/zio.h>
  34 #include <sys/range_tree.h>
  35 
  36 kmem_cache_t *range_seg_cache;
  37 
  38 void
  39 range_tree_init(void)
  40 {
  41         ASSERT(range_seg_cache == NULL);
  42         range_seg_cache = kmem_cache_create("range_seg_cache",
  43             sizeof (range_seg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
  44 }
  45 
  46 void
  47 range_tree_fini(void)
  48 {
  49         kmem_cache_destroy(range_seg_cache);
  50         range_seg_cache = NULL;
  51 }
  52 
  53 void
  54 range_tree_stat_verify(range_tree_t *rt)
  55 {
  56         range_seg_t *rs;
  57         uint64_t hist[RANGE_TREE_HISTOGRAM_SIZE] = { 0 };
  58         int i;
  59 
  60         for (rs = avl_first(&rt->rt_root); rs != NULL;
  61             rs = AVL_NEXT(&rt->rt_root, rs)) {
  62                 uint64_t size = rs->rs_end - rs->rs_start;
  63                 int idx = highbit64(size) - 1;
  64 
  65                 hist[idx]++;
  66                 ASSERT3U(hist[idx], !=, 0);
  67         }
  68 
  69         for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) {
  70                 if (hist[i] != rt->rt_histogram[i]) {
  71                         zfs_dbgmsg("i=%d, hist=%p, hist=%llu, rt_hist=%llu",
  72                             i, hist, hist[i], rt->rt_histogram[i]);
  73                 }
  74                 VERIFY3U(hist[i], ==, rt->rt_histogram[i]);
  75         }
  76 }
  77 
  78 static void
  79 range_tree_stat_incr(range_tree_t *rt, range_seg_t *rs)
  80 {
  81         uint64_t size = rs->rs_end - rs->rs_start;
  82         int idx = highbit64(size) - 1;
  83 
  84         ASSERT(size != 0);
  85         ASSERT3U(idx, <,
  86             sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram));
  87 
  88         rt->rt_histogram[idx]++;
  89         ASSERT3U(rt->rt_histogram[idx], !=, 0);
  90 }
  91 
  92 static void
  93 range_tree_stat_decr(range_tree_t *rt, range_seg_t *rs)
  94 {
  95         uint64_t size = rs->rs_end - rs->rs_start;
  96         int idx = highbit64(size) - 1;
  97 
  98         ASSERT(size != 0);
  99         ASSERT3U(idx, <,
 100             sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram));
 101 
 102         ASSERT3U(rt->rt_histogram[idx], !=, 0);
 103         rt->rt_histogram[idx]--;
 104 }
 105 
 106 /*
 107  * NOTE: caller is responsible for all locking.
 108  */
 109 static int
 110 range_tree_seg_compare(const void *x1, const void *x2)
 111 {
 112         const range_seg_t *r1 = x1;
 113         const range_seg_t *r2 = x2;
 114 
 115         if (r1->rs_start < r2->rs_start) {
 116                 if (r1->rs_end > r2->rs_start)
 117                         return (0);
 118                 return (-1);
 119         }
 120         if (r1->rs_start > r2->rs_start) {
 121                 if (r1->rs_start < r2->rs_end)
 122                         return (0);
 123                 return (1);
 124         }
 125         return (0);
 126 }
 127 
 128 range_tree_t *
 129 range_tree_create(range_tree_ops_t *ops, void *arg)
 130 {
 131         range_tree_t *rt;
 132 
 133         rt = kmem_zalloc(sizeof (range_tree_t), KM_SLEEP);
 134 
 135         avl_create(&rt->rt_root, range_tree_seg_compare,
 136             sizeof (range_seg_t), offsetof(range_seg_t, rs_node));
 137 
 138         rt->rt_ops = ops;
 139         rt->rt_arg = arg;
 140 
 141         if (rt->rt_ops != NULL)
 142                 rt->rt_ops->rtop_create(rt, rt->rt_arg);
 143 
 144         return (rt);
 145 }
 146 
 147 void
 148 range_tree_destroy(range_tree_t *rt)
 149 {
 150         VERIFY0(rt->rt_space);
 151 
 152         if (rt->rt_ops != NULL)
 153                 rt->rt_ops->rtop_destroy(rt, rt->rt_arg);
 154 
 155         avl_destroy(&rt->rt_root);
 156         kmem_free(rt, sizeof (*rt));
 157 }
 158 
 159 void
 160 range_tree_add(void *arg, uint64_t start, uint64_t size)
 161 {
 162         range_tree_t *rt = arg;
 163         avl_index_t where;
 164         range_seg_t rsearch, *rs_before, *rs_after, *rs;
 165         uint64_t end = start + size;
 166         boolean_t merge_before, merge_after;
 167 
 168         VERIFY(size != 0);
 169 
 170         rsearch.rs_start = start;
 171         rsearch.rs_end = end;
 172         rs = avl_find(&rt->rt_root, &rsearch, &where);
 173 
 174         if (rs != NULL && rs->rs_start <= start && rs->rs_end >= end) {
 175                 zfs_panic_recover("zfs: allocating allocated segment"
 176                     "(offset=%llu size=%llu)\n",
 177                     (longlong_t)start, (longlong_t)size);
 178                 return;
 179         }
 180 
 181         /* Make sure we don't overlap with either of our neighbors */
 182         VERIFY3P(rs, ==, NULL);
 183 
 184         rs_before = avl_nearest(&rt->rt_root, where, AVL_BEFORE);
 185         rs_after = avl_nearest(&rt->rt_root, where, AVL_AFTER);
 186 
 187         merge_before = (rs_before != NULL && rs_before->rs_end == start);
 188         merge_after = (rs_after != NULL && rs_after->rs_start == end);
 189 
 190         if (merge_before && merge_after) {
 191                 avl_remove(&rt->rt_root, rs_before);
 192                 if (rt->rt_ops != NULL) {
 193                         rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg);
 194                         rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg);
 195                 }
 196 
 197                 range_tree_stat_decr(rt, rs_before);
 198                 range_tree_stat_decr(rt, rs_after);
 199 
 200                 rs_after->rs_start = rs_before->rs_start;
 201                 kmem_cache_free(range_seg_cache, rs_before);
 202                 rs = rs_after;
 203         } else if (merge_before) {
 204                 if (rt->rt_ops != NULL)
 205                         rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg);
 206 
 207                 range_tree_stat_decr(rt, rs_before);
 208 
 209                 rs_before->rs_end = end;
 210                 rs = rs_before;
 211         } else if (merge_after) {
 212                 if (rt->rt_ops != NULL)
 213                         rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg);
 214 
 215                 range_tree_stat_decr(rt, rs_after);
 216 
 217                 rs_after->rs_start = start;
 218                 rs = rs_after;
 219         } else {
 220                 rs = kmem_cache_alloc(range_seg_cache, KM_SLEEP);
 221                 rs->rs_start = start;
 222                 rs->rs_end = end;
 223                 avl_insert(&rt->rt_root, rs, where);
 224         }
 225 
 226         if (rt->rt_ops != NULL)
 227                 rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
 228 
 229         range_tree_stat_incr(rt, rs);
 230         rt->rt_space += size;
 231 }
 232 
 233 void
 234 range_tree_remove(void *arg, uint64_t start, uint64_t size)
 235 {
 236         range_tree_t *rt = arg;
 237         avl_index_t where;
 238         range_seg_t rsearch, *rs, *newseg;
 239         uint64_t end = start + size;
 240         boolean_t left_over, right_over;
 241 
 242         VERIFY3U(size, !=, 0);
 243         VERIFY3U(size, <=, rt->rt_space);
 244 
 245         rsearch.rs_start = start;
 246         rsearch.rs_end = end;
 247         rs = avl_find(&rt->rt_root, &rsearch, &where);
 248 
 249         /* Make sure we completely overlap with someone */
 250         if (rs == NULL) {
 251                 zfs_panic_recover("zfs: freeing free segment "
 252                     "(offset=%llu size=%llu)",
 253                     (longlong_t)start, (longlong_t)size);
 254                 return;
 255         }
 256         VERIFY3U(rs->rs_start, <=, start);
 257         VERIFY3U(rs->rs_end, >=, end);
 258 
 259         left_over = (rs->rs_start != start);
 260         right_over = (rs->rs_end != end);
 261 
 262         range_tree_stat_decr(rt, rs);
 263 
 264         if (rt->rt_ops != NULL)
 265                 rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg);
 266 
 267         if (left_over && right_over) {
 268                 newseg = kmem_cache_alloc(range_seg_cache, KM_SLEEP);
 269                 newseg->rs_start = end;
 270                 newseg->rs_end = rs->rs_end;
 271                 range_tree_stat_incr(rt, newseg);
 272 
 273                 rs->rs_end = start;
 274 
 275                 avl_insert_here(&rt->rt_root, newseg, rs, AVL_AFTER);
 276                 if (rt->rt_ops != NULL)
 277                         rt->rt_ops->rtop_add(rt, newseg, rt->rt_arg);
 278         } else if (left_over) {
 279                 rs->rs_end = start;
 280         } else if (right_over) {
 281                 rs->rs_start = end;
 282         } else {
 283                 avl_remove(&rt->rt_root, rs);
 284                 kmem_cache_free(range_seg_cache, rs);
 285                 rs = NULL;
 286         }
 287 
 288         if (rs != NULL) {
 289                 range_tree_stat_incr(rt, rs);
 290 
 291                 if (rt->rt_ops != NULL)
 292                         rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
 293         }
 294 
 295         rt->rt_space -= size;
 296 }
 297 
 298 static range_seg_t *
 299 range_tree_find_impl(range_tree_t *rt, uint64_t start, uint64_t size)
 300 {
 301         range_seg_t rsearch;
 302         uint64_t end = start + size;
 303 
 304         VERIFY(size != 0);
 305 
 306         rsearch.rs_start = start;
 307         rsearch.rs_end = end;
 308         return (avl_find(&rt->rt_root, &rsearch, NULL));
 309 }
 310 
 311 static range_seg_t *
 312 range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size)
 313 {
 314         range_seg_t *rs = range_tree_find_impl(rt, start, size);
 315         if (rs != NULL && rs->rs_start <= start && rs->rs_end >= start + size)
 316                 return (rs);
 317         return (NULL);
 318 }
 319 
 320 void
 321 range_tree_verify_not_present(range_tree_t *rt, uint64_t off, uint64_t size)
 322 {
 323         range_seg_t *rs = range_tree_find(rt, off, size);
 324         if (rs != NULL)
 325                 panic("segment already in tree; rs=%p", (void *)rs);
 326 }
 327 
 328 boolean_t
 329 range_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size)
 330 {
 331         return (range_tree_find(rt, start, size) != NULL);
 332 }
 333 
 334 /*
 335  * Ensure that this range is not in the tree, regardless of whether
 336  * it is currently in the tree.
 337  */
 338 void
 339 range_tree_clear(range_tree_t *rt, uint64_t start, uint64_t size)
 340 {
 341         range_seg_t *rs;
 342 
 343         if (size == 0)
 344                 return;
 345 
 346         while ((rs = range_tree_find_impl(rt, start, size)) != NULL) {
 347                 uint64_t free_start = MAX(rs->rs_start, start);
 348                 uint64_t free_end = MIN(rs->rs_end, start + size);
 349                 range_tree_remove(rt, free_start, free_end - free_start);
 350         }
 351 }
 352 
 353 void
 354 range_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst)
 355 {
 356         range_tree_t *rt;
 357 
 358         ASSERT0(range_tree_space(*rtdst));
 359         ASSERT0(avl_numnodes(&(*rtdst)->rt_root));
 360 
 361         rt = *rtsrc;
 362         *rtsrc = *rtdst;
 363         *rtdst = rt;
 364 }
 365 
 366 void
 367 range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg)
 368 {
 369         range_seg_t *rs;
 370         void *cookie = NULL;
 371 
 372 
 373         if (rt->rt_ops != NULL)
 374                 rt->rt_ops->rtop_vacate(rt, rt->rt_arg);
 375 
 376         while ((rs = avl_destroy_nodes(&rt->rt_root, &cookie)) != NULL) {
 377                 if (func != NULL)
 378                         func(arg, rs->rs_start, rs->rs_end - rs->rs_start);
 379                 kmem_cache_free(range_seg_cache, rs);
 380         }
 381 
 382         bzero(rt->rt_histogram, sizeof (rt->rt_histogram));
 383         rt->rt_space = 0;
 384 }
 385 
 386 void
 387 range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg)
 388 {
 389         range_seg_t *rs;
 390 
 391         for (rs = avl_first(&rt->rt_root); rs; rs = AVL_NEXT(&rt->rt_root, rs))
 392                 func(arg, rs->rs_start, rs->rs_end - rs->rs_start);
 393 }
 394 
 395 uint64_t
 396 range_tree_space(range_tree_t *rt)
 397 {
 398         return (rt->rt_space);
 399 }
 400 
 401 boolean_t
 402 range_tree_is_empty(range_tree_t *rt)
 403 {
 404         ASSERT(rt != NULL);
 405         return (range_tree_space(rt) == 0);
 406 }
 407 
 408 uint64_t
 409 range_tree_min(range_tree_t *rt)
 410 {
 411         range_seg_t *rs = avl_first(&rt->rt_root);
 412         return (rs != NULL ? rs->rs_start : 0);
 413 }
 414 
 415 uint64_t
 416 range_tree_max(range_tree_t *rt)
 417 {
 418         range_seg_t *rs = avl_last(&rt->rt_root);
 419         return (rs != NULL ? rs->rs_end : 0);
 420 }
 421 
 422 uint64_t
 423 range_tree_span(range_tree_t *rt)
 424 {
 425         return (range_tree_max(rt) - range_tree_min(rt));
 426 }