1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 /*
  26  * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
  27  */
  28 
  29 #include <sys/zfs_context.h>
  30 #include <sys/spa.h>
  31 #include <sys/dmu.h>
  32 #include <sys/dnode.h>
  33 #include <sys/zio.h>
  34 #include <sys/range_tree.h>
  35 
  36 kmem_cache_t *range_seg_cache;
  37 
  38 void
  39 range_tree_init(void)
  40 {
  41         ASSERT(range_seg_cache == NULL);
  42         range_seg_cache = kmem_cache_create("range_seg_cache",
  43             sizeof (range_seg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
  44 }
  45 
  46 void
  47 range_tree_fini(void)
  48 {
  49         kmem_cache_destroy(range_seg_cache);
  50         range_seg_cache = NULL;
  51 }
  52 
  53 void
  54 range_tree_stat_verify(range_tree_t *rt)
  55 {
  56         range_seg_t *rs;
  57         uint64_t hist[RANGE_TREE_HISTOGRAM_SIZE] = { 0 };
  58         int i;
  59 
  60         for (rs = avl_first(&rt->rt_root); rs != NULL;
  61             rs = AVL_NEXT(&rt->rt_root, rs)) {
  62                 uint64_t size = rs->rs_end - rs->rs_start;
  63                 int idx = highbit64(size) - 1;
  64 
  65                 hist[idx]++;
  66                 ASSERT3U(hist[idx], !=, 0);
  67         }
  68 
  69         for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) {
  70                 if (hist[i] != rt->rt_histogram[i]) {
  71                         zfs_dbgmsg("i=%d, hist=%p, hist=%llu, rt_hist=%llu",
  72                             i, hist, hist[i], rt->rt_histogram[i]);
  73                 }
  74                 VERIFY3U(hist[i], ==, rt->rt_histogram[i]);
  75         }
  76 }
  77 
  78 static void
  79 range_tree_stat_incr(range_tree_t *rt, range_seg_t *rs)
  80 {
  81         uint64_t size = rs->rs_end - rs->rs_start;
  82         int idx = highbit64(size) - 1;
  83 
  84         ASSERT(size != 0);
  85         ASSERT3U(idx, <,
  86             sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram));
  87 
  88         rt->rt_histogram[idx]++;
  89         ASSERT3U(rt->rt_histogram[idx], !=, 0);
  90 }
  91 
  92 static void
  93 range_tree_stat_decr(range_tree_t *rt, range_seg_t *rs)
  94 {
  95         uint64_t size = rs->rs_end - rs->rs_start;
  96         int idx = highbit64(size) - 1;
  97 
  98         ASSERT(size != 0);
  99         ASSERT3U(idx, <,
 100             sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram));
 101 
 102         ASSERT3U(rt->rt_histogram[idx], !=, 0);
 103         rt->rt_histogram[idx]--;
 104 }
 105 
 106 /*
 107  * NOTE: caller is responsible for all locking.
 108  */
 109 static int
 110 range_tree_seg_compare(const void *x1, const void *x2)
 111 {
 112         const range_seg_t *r1 = x1;
 113         const range_seg_t *r2 = x2;
 114 
 115         if (r1->rs_start < r2->rs_start) {
 116                 if (r1->rs_end > r2->rs_start)
 117                         return (0);
 118                 return (-1);
 119         }
 120         if (r1->rs_start > r2->rs_start) {
 121                 if (r1->rs_start < r2->rs_end)
 122                         return (0);
 123                 return (1);
 124         }
 125         return (0);
 126 }
 127 
 128 range_tree_t *
 129 range_tree_create(range_tree_ops_t *ops, void *arg)
 130 {
 131         range_tree_t *rt;
 132 
 133         rt = kmem_zalloc(sizeof (range_tree_t), KM_SLEEP);
 134 
 135         avl_create(&rt->rt_root, range_tree_seg_compare,
 136             sizeof (range_seg_t), offsetof(range_seg_t, rs_node));
 137 
 138         rt->rt_ops = ops;
 139         rt->rt_arg = arg;
 140 
 141         if (rt->rt_ops != NULL)
 142                 rt->rt_ops->rtop_create(rt, rt->rt_arg);
 143 
 144         return (rt);
 145 }
 146 
 147 void
 148 range_tree_destroy(range_tree_t *rt)
 149 {
 150         VERIFY0(rt->rt_space);
 151 
 152         if (rt->rt_ops != NULL)
 153                 rt->rt_ops->rtop_destroy(rt, rt->rt_arg);
 154 
 155         avl_destroy(&rt->rt_root);
 156         kmem_free(rt, sizeof (*rt));
 157 }
 158 
 159 void
 160 range_tree_add(void *arg, uint64_t start, uint64_t size)
 161 {
 162         range_tree_t *rt = arg;
 163         avl_index_t where;
 164         range_seg_t rsearch, *rs_before, *rs_after, *rs;
 165         uint64_t end = start + size;
 166         boolean_t merge_before, merge_after;
 167 
 168         VERIFY(size != 0);
 169 
 170         rsearch.rs_start = start;
 171         rsearch.rs_end = end;
 172         rs = avl_find(&rt->rt_root, &rsearch, &where);
 173 
 174         if (rs != NULL && rs->rs_start <= start && rs->rs_end >= end) {
 175                 zfs_panic_recover("zfs: allocating allocated segment"
 176                     "(offset=%llu size=%llu)\n",
 177                     (longlong_t)start, (longlong_t)size);
 178                 return;
 179         }
 180 
 181         /* Make sure we don't overlap with either of our neighbors */
 182         VERIFY(rs == NULL);
 183 
 184         rs_before = avl_nearest(&rt->rt_root, where, AVL_BEFORE);
 185         rs_after = avl_nearest(&rt->rt_root, where, AVL_AFTER);
 186 
 187         merge_before = (rs_before != NULL && rs_before->rs_end == start);
 188         merge_after = (rs_after != NULL && rs_after->rs_start == end);
 189 
 190         if (merge_before && merge_after) {
 191                 avl_remove(&rt->rt_root, rs_before);
 192                 if (rt->rt_ops != NULL) {
 193                         rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg);
 194                         rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg);
 195                 }
 196 
 197                 range_tree_stat_decr(rt, rs_before);
 198                 range_tree_stat_decr(rt, rs_after);
 199 
 200                 rs_after->rs_start = rs_before->rs_start;
 201                 kmem_cache_free(range_seg_cache, rs_before);
 202                 rs = rs_after;
 203         } else if (merge_before) {
 204                 if (rt->rt_ops != NULL)
 205                         rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg);
 206 
 207                 range_tree_stat_decr(rt, rs_before);
 208 
 209                 rs_before->rs_end = end;
 210                 rs = rs_before;
 211         } else if (merge_after) {
 212                 if (rt->rt_ops != NULL)
 213                         rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg);
 214 
 215                 range_tree_stat_decr(rt, rs_after);
 216 
 217                 rs_after->rs_start = start;
 218                 rs = rs_after;
 219         } else {
 220                 rs = kmem_cache_alloc(range_seg_cache, KM_SLEEP);
 221                 rs->rs_start = start;
 222                 rs->rs_end = end;
 223                 avl_insert(&rt->rt_root, rs, where);
 224         }
 225 
 226         if (rt->rt_ops != NULL)
 227                 rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
 228 
 229         range_tree_stat_incr(rt, rs);
 230         rt->rt_space += size;
 231 }
 232 
 233 void
 234 range_tree_remove(void *arg, uint64_t start, uint64_t size)
 235 {
 236         range_tree_t *rt = arg;
 237         avl_index_t where;
 238         range_seg_t rsearch, *rs, *newseg;
 239         uint64_t end = start + size;
 240         boolean_t left_over, right_over;
 241 
 242         VERIFY3U(size, !=, 0);
 243         VERIFY3U(size, <=, rt->rt_space);
 244 
 245         rsearch.rs_start = start;
 246         rsearch.rs_end = end;
 247         rs = avl_find(&rt->rt_root, &rsearch, &where);
 248 
 249         /* Make sure we completely overlap with someone */
 250         if (rs == NULL) {
 251                 zfs_panic_recover("zfs: freeing free segment "
 252                     "(offset=%llu size=%llu)",
 253                     (longlong_t)start, (longlong_t)size);
 254                 return;
 255         }
 256         VERIFY3U(rs->rs_start, <=, start);
 257         VERIFY3U(rs->rs_end, >=, end);
 258 
 259         left_over = (rs->rs_start != start);
 260         right_over = (rs->rs_end != end);
 261 
 262         range_tree_stat_decr(rt, rs);
 263 
 264         if (rt->rt_ops != NULL)
 265                 rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg);
 266 
 267         if (left_over && right_over) {
 268                 newseg = kmem_cache_alloc(range_seg_cache, KM_SLEEP);
 269                 newseg->rs_start = end;
 270                 newseg->rs_end = rs->rs_end;
 271                 range_tree_stat_incr(rt, newseg);
 272 
 273                 rs->rs_end = start;
 274 
 275                 avl_insert_here(&rt->rt_root, newseg, rs, AVL_AFTER);
 276                 if (rt->rt_ops != NULL)
 277                         rt->rt_ops->rtop_add(rt, newseg, rt->rt_arg);
 278         } else if (left_over) {
 279                 rs->rs_end = start;
 280         } else if (right_over) {
 281                 rs->rs_start = end;
 282         } else {
 283                 avl_remove(&rt->rt_root, rs);
 284                 kmem_cache_free(range_seg_cache, rs);
 285                 rs = NULL;
 286         }
 287 
 288         if (rs != NULL) {
 289                 range_tree_stat_incr(rt, rs);
 290 
 291                 if (rt->rt_ops != NULL)
 292                         rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
 293         }
 294 
 295         rt->rt_space -= size;
 296 }
 297 
 298 static range_seg_t *
 299 range_tree_find_impl(range_tree_t *rt, uint64_t start, uint64_t size)
 300 {
 301         avl_index_t where;
 302         range_seg_t rsearch;
 303         uint64_t end = start + size;
 304 
 305         VERIFY(size != 0);
 306 
 307         rsearch.rs_start = start;
 308         rsearch.rs_end = end;
 309         return (avl_find(&rt->rt_root, &rsearch, &where));
 310 }
 311 
 312 static range_seg_t *
 313 range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size)
 314 {
 315         range_seg_t *rs = range_tree_find_impl(rt, start, size);
 316         if (rs != NULL && rs->rs_start <= start && rs->rs_end >= start + size)
 317                 return (rs);
 318         return (NULL);
 319 }
 320 
 321 void
 322 range_tree_verify(range_tree_t *rt, uint64_t off, uint64_t size)
 323 {
 324         range_seg_t *rs;
 325 
 326         rs = range_tree_find(rt, off, size);
 327         if (rs != NULL)
 328                 panic("freeing free block; rs=%p", (void *)rs);
 329 }
 330 
 331 boolean_t
 332 range_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size)
 333 {
 334         return (range_tree_find(rt, start, size) != NULL);
 335 }
 336 
 337 /*
 338  * Ensure that this range is not in the tree, regardless of whether
 339  * it is currently in the tree.
 340  */
 341 void
 342 range_tree_clear(range_tree_t *rt, uint64_t start, uint64_t size)
 343 {
 344         range_seg_t *rs;
 345 
 346         if (size == 0)
 347                 return;
 348 
 349         while ((rs = range_tree_find_impl(rt, start, size)) != NULL) {
 350                 uint64_t free_start = MAX(rs->rs_start, start);
 351                 uint64_t free_end = MIN(rs->rs_end, start + size);
 352                 range_tree_remove(rt, free_start, free_end - free_start);
 353         }
 354 }
 355 
 356 void
 357 range_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst)
 358 {
 359         range_tree_t *rt;
 360 
 361         ASSERT0(range_tree_space(*rtdst));
 362         ASSERT0(avl_numnodes(&(*rtdst)->rt_root));
 363 
 364         rt = *rtsrc;
 365         *rtsrc = *rtdst;
 366         *rtdst = rt;
 367 }
 368 
 369 void
 370 range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg)
 371 {
 372         range_seg_t *rs;
 373         void *cookie = NULL;
 374 
 375 
 376         if (rt->rt_ops != NULL)
 377                 rt->rt_ops->rtop_vacate(rt, rt->rt_arg);
 378 
 379         while ((rs = avl_destroy_nodes(&rt->rt_root, &cookie)) != NULL) {
 380                 if (func != NULL)
 381                         func(arg, rs->rs_start, rs->rs_end - rs->rs_start);
 382                 kmem_cache_free(range_seg_cache, rs);
 383         }
 384 
 385         bzero(rt->rt_histogram, sizeof (rt->rt_histogram));
 386         rt->rt_space = 0;
 387 }
 388 
 389 void
 390 range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg)
 391 {
 392         range_seg_t *rs;
 393 
 394 
 395         for (rs = avl_first(&rt->rt_root); rs; rs = AVL_NEXT(&rt->rt_root, rs))
 396                 func(arg, rs->rs_start, rs->rs_end - rs->rs_start);
 397 }
 398 
 399 uint64_t
 400 range_tree_space(range_tree_t *rt)
 401 {
 402         return (rt->rt_space);
 403 }