1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25 /*
26 * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
27 */
28
29 #include <sys/zfs_context.h>
30 #include <sys/spa.h>
31 #include <sys/dmu.h>
32 #include <sys/dnode.h>
33 #include <sys/zio.h>
34 #include <sys/range_tree.h>
35
36 kmem_cache_t *range_seg_cache;
37
38 void
39 range_tree_init(void)
40 {
41 ASSERT(range_seg_cache == NULL);
42 range_seg_cache = kmem_cache_create("range_seg_cache",
43 sizeof (range_seg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
44 }
45
46 void
47 range_tree_fini(void)
48 {
49 kmem_cache_destroy(range_seg_cache);
50 range_seg_cache = NULL;
51 }
52
53 void
54 range_tree_stat_verify(range_tree_t *rt)
55 {
56 range_seg_t *rs;
57 uint64_t hist[RANGE_TREE_HISTOGRAM_SIZE] = { 0 };
58 int i;
59
60 for (rs = avl_first(&rt->rt_root); rs != NULL;
61 rs = AVL_NEXT(&rt->rt_root, rs)) {
62 uint64_t size = rs->rs_end - rs->rs_start;
63 int idx = highbit64(size) - 1;
64
65 hist[idx]++;
66 ASSERT3U(hist[idx], !=, 0);
67 }
68
69 for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) {
70 if (hist[i] != rt->rt_histogram[i]) {
71 zfs_dbgmsg("i=%d, hist=%p, hist=%llu, rt_hist=%llu",
72 i, hist, hist[i], rt->rt_histogram[i]);
73 }
74 VERIFY3U(hist[i], ==, rt->rt_histogram[i]);
75 }
76 }
77
78 static void
79 range_tree_stat_incr(range_tree_t *rt, range_seg_t *rs)
80 {
81 uint64_t size = rs->rs_end - rs->rs_start;
82 int idx = highbit64(size) - 1;
83
84 ASSERT(size != 0);
85 ASSERT3U(idx, <,
86 sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram));
87
88 rt->rt_histogram[idx]++;
89 ASSERT3U(rt->rt_histogram[idx], !=, 0);
90 }
91
92 static void
93 range_tree_stat_decr(range_tree_t *rt, range_seg_t *rs)
94 {
95 uint64_t size = rs->rs_end - rs->rs_start;
96 int idx = highbit64(size) - 1;
97
98 ASSERT(size != 0);
99 ASSERT3U(idx, <,
100 sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram));
101
102 ASSERT3U(rt->rt_histogram[idx], !=, 0);
103 rt->rt_histogram[idx]--;
104 }
105
106 /*
107 * NOTE: caller is responsible for all locking.
108 */
109 static int
110 range_tree_seg_compare(const void *x1, const void *x2)
111 {
112 const range_seg_t *r1 = x1;
113 const range_seg_t *r2 = x2;
114
115 if (r1->rs_start < r2->rs_start) {
116 if (r1->rs_end > r2->rs_start)
117 return (0);
118 return (-1);
119 }
120 if (r1->rs_start > r2->rs_start) {
121 if (r1->rs_start < r2->rs_end)
122 return (0);
123 return (1);
124 }
125 return (0);
126 }
127
128 range_tree_t *
129 range_tree_create(range_tree_ops_t *ops, void *arg)
130 {
131 range_tree_t *rt;
132
133 rt = kmem_zalloc(sizeof (range_tree_t), KM_SLEEP);
134
135 avl_create(&rt->rt_root, range_tree_seg_compare,
136 sizeof (range_seg_t), offsetof(range_seg_t, rs_node));
137
138 rt->rt_ops = ops;
139 rt->rt_arg = arg;
140
141 if (rt->rt_ops != NULL)
142 rt->rt_ops->rtop_create(rt, rt->rt_arg);
143
144 return (rt);
145 }
146
147 void
148 range_tree_destroy(range_tree_t *rt)
149 {
150 VERIFY0(rt->rt_space);
151
152 if (rt->rt_ops != NULL)
153 rt->rt_ops->rtop_destroy(rt, rt->rt_arg);
154
155 avl_destroy(&rt->rt_root);
156 kmem_free(rt, sizeof (*rt));
157 }
158
159 void
160 range_tree_add(void *arg, uint64_t start, uint64_t size)
161 {
162 range_tree_t *rt = arg;
163 avl_index_t where;
164 range_seg_t rsearch, *rs_before, *rs_after, *rs;
165 uint64_t end = start + size;
166 boolean_t merge_before, merge_after;
167
168 VERIFY(size != 0);
169
170 rsearch.rs_start = start;
171 rsearch.rs_end = end;
172 rs = avl_find(&rt->rt_root, &rsearch, &where);
173
174 if (rs != NULL && rs->rs_start <= start && rs->rs_end >= end) {
175 zfs_panic_recover("zfs: allocating allocated segment"
176 "(offset=%llu size=%llu)\n",
177 (longlong_t)start, (longlong_t)size);
178 return;
179 }
180
181 /* Make sure we don't overlap with either of our neighbors */
182 VERIFY3P(rs, ==, NULL);
183
184 rs_before = avl_nearest(&rt->rt_root, where, AVL_BEFORE);
185 rs_after = avl_nearest(&rt->rt_root, where, AVL_AFTER);
186
187 merge_before = (rs_before != NULL && rs_before->rs_end == start);
188 merge_after = (rs_after != NULL && rs_after->rs_start == end);
189
190 if (merge_before && merge_after) {
191 avl_remove(&rt->rt_root, rs_before);
192 if (rt->rt_ops != NULL) {
193 rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg);
194 rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg);
195 }
196
197 range_tree_stat_decr(rt, rs_before);
198 range_tree_stat_decr(rt, rs_after);
199
200 rs_after->rs_start = rs_before->rs_start;
201 kmem_cache_free(range_seg_cache, rs_before);
202 rs = rs_after;
203 } else if (merge_before) {
204 if (rt->rt_ops != NULL)
205 rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg);
206
207 range_tree_stat_decr(rt, rs_before);
208
209 rs_before->rs_end = end;
210 rs = rs_before;
211 } else if (merge_after) {
212 if (rt->rt_ops != NULL)
213 rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg);
214
215 range_tree_stat_decr(rt, rs_after);
216
217 rs_after->rs_start = start;
218 rs = rs_after;
219 } else {
220 rs = kmem_cache_alloc(range_seg_cache, KM_SLEEP);
221 rs->rs_start = start;
222 rs->rs_end = end;
223 avl_insert(&rt->rt_root, rs, where);
224 }
225
226 if (rt->rt_ops != NULL)
227 rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
228
229 range_tree_stat_incr(rt, rs);
230 rt->rt_space += size;
231 }
232
233 void
234 range_tree_remove(void *arg, uint64_t start, uint64_t size)
235 {
236 range_tree_t *rt = arg;
237 avl_index_t where;
238 range_seg_t rsearch, *rs, *newseg;
239 uint64_t end = start + size;
240 boolean_t left_over, right_over;
241
242 VERIFY3U(size, !=, 0);
243 VERIFY3U(size, <=, rt->rt_space);
244
245 rsearch.rs_start = start;
246 rsearch.rs_end = end;
247 rs = avl_find(&rt->rt_root, &rsearch, &where);
248
249 /* Make sure we completely overlap with someone */
250 if (rs == NULL) {
251 zfs_panic_recover("zfs: freeing free segment "
252 "(offset=%llu size=%llu)",
253 (longlong_t)start, (longlong_t)size);
254 return;
255 }
256 VERIFY3U(rs->rs_start, <=, start);
257 VERIFY3U(rs->rs_end, >=, end);
258
259 left_over = (rs->rs_start != start);
260 right_over = (rs->rs_end != end);
261
262 range_tree_stat_decr(rt, rs);
263
264 if (rt->rt_ops != NULL)
265 rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg);
266
267 if (left_over && right_over) {
268 newseg = kmem_cache_alloc(range_seg_cache, KM_SLEEP);
269 newseg->rs_start = end;
270 newseg->rs_end = rs->rs_end;
271 range_tree_stat_incr(rt, newseg);
272
273 rs->rs_end = start;
274
275 avl_insert_here(&rt->rt_root, newseg, rs, AVL_AFTER);
276 if (rt->rt_ops != NULL)
277 rt->rt_ops->rtop_add(rt, newseg, rt->rt_arg);
278 } else if (left_over) {
279 rs->rs_end = start;
280 } else if (right_over) {
281 rs->rs_start = end;
282 } else {
283 avl_remove(&rt->rt_root, rs);
284 kmem_cache_free(range_seg_cache, rs);
285 rs = NULL;
286 }
287
288 if (rs != NULL) {
289 range_tree_stat_incr(rt, rs);
290
291 if (rt->rt_ops != NULL)
292 rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
293 }
294
295 rt->rt_space -= size;
296 }
297
298 static range_seg_t *
299 range_tree_find_impl(range_tree_t *rt, uint64_t start, uint64_t size)
300 {
301 range_seg_t rsearch;
302 uint64_t end = start + size;
303
304 VERIFY(size != 0);
305
306 rsearch.rs_start = start;
307 rsearch.rs_end = end;
308 return (avl_find(&rt->rt_root, &rsearch, NULL));
309 }
310
311 static range_seg_t *
312 range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size)
313 {
314 range_seg_t *rs = range_tree_find_impl(rt, start, size);
315 if (rs != NULL && rs->rs_start <= start && rs->rs_end >= start + size)
316 return (rs);
317 return (NULL);
318 }
319
320 void
321 range_tree_verify_not_present(range_tree_t *rt, uint64_t off, uint64_t size)
322 {
323 range_seg_t *rs = range_tree_find(rt, off, size);
324 if (rs != NULL)
325 panic("segment already in tree; rs=%p", (void *)rs);
326 }
327
328 boolean_t
329 range_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size)
330 {
331 return (range_tree_find(rt, start, size) != NULL);
332 }
333
334 /*
335 * Ensure that this range is not in the tree, regardless of whether
336 * it is currently in the tree.
337 */
338 void
339 range_tree_clear(range_tree_t *rt, uint64_t start, uint64_t size)
340 {
341 range_seg_t *rs;
342
343 if (size == 0)
344 return;
345
346 while ((rs = range_tree_find_impl(rt, start, size)) != NULL) {
347 uint64_t free_start = MAX(rs->rs_start, start);
348 uint64_t free_end = MIN(rs->rs_end, start + size);
349 range_tree_remove(rt, free_start, free_end - free_start);
350 }
351 }
352
353 void
354 range_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst)
355 {
356 range_tree_t *rt;
357
358 ASSERT0(range_tree_space(*rtdst));
359 ASSERT0(avl_numnodes(&(*rtdst)->rt_root));
360
361 rt = *rtsrc;
362 *rtsrc = *rtdst;
363 *rtdst = rt;
364 }
365
366 void
367 range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg)
368 {
369 range_seg_t *rs;
370 void *cookie = NULL;
371
372
373 if (rt->rt_ops != NULL)
374 rt->rt_ops->rtop_vacate(rt, rt->rt_arg);
375
376 while ((rs = avl_destroy_nodes(&rt->rt_root, &cookie)) != NULL) {
377 if (func != NULL)
378 func(arg, rs->rs_start, rs->rs_end - rs->rs_start);
379 kmem_cache_free(range_seg_cache, rs);
380 }
381
382 bzero(rt->rt_histogram, sizeof (rt->rt_histogram));
383 rt->rt_space = 0;
384 }
385
386 void
387 range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg)
388 {
389 range_seg_t *rs;
390
391 for (rs = avl_first(&rt->rt_root); rs; rs = AVL_NEXT(&rt->rt_root, rs))
392 func(arg, rs->rs_start, rs->rs_end - rs->rs_start);
393 }
394
395 uint64_t
396 range_tree_space(range_tree_t *rt)
397 {
398 return (rt->rt_space);
399 }
400
401 boolean_t
402 range_tree_is_empty(range_tree_t *rt)
403 {
404 ASSERT(rt != NULL);
405 return (range_tree_space(rt) == 0);
406 }
407
408 uint64_t
409 range_tree_min(range_tree_t *rt)
410 {
411 range_seg_t *rs = avl_first(&rt->rt_root);
412 return (rs != NULL ? rs->rs_start : 0);
413 }
414
415 uint64_t
416 range_tree_max(range_tree_t *rt)
417 {
418 range_seg_t *rs = avl_last(&rt->rt_root);
419 return (rs != NULL ? rs->rs_end : 0);
420 }
421
422 uint64_t
423 range_tree_span(range_tree_t *rt)
424 {
425 return (range_tree_max(rt) - range_tree_min(rt));
426 }