6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25 /*
26 * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
27 */
28
29 #include <sys/zfs_context.h>
30 #include <sys/spa.h>
31 #include <sys/dmu.h>
32 #include <sys/dnode.h>
33 #include <sys/zio.h>
34 #include <sys/range_tree.h>
35
36 kmem_cache_t *range_seg_cache;
37
38 void
39 range_tree_init(void)
40 {
41 ASSERT(range_seg_cache == NULL);
42 range_seg_cache = kmem_cache_create("range_seg_cache",
43 sizeof (range_seg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
44 }
45
46 void
47 range_tree_fini(void)
48 {
49 kmem_cache_destroy(range_seg_cache);
50 range_seg_cache = NULL;
51 }
52
53 void
54 range_tree_stat_verify(range_tree_t *rt)
55 {
58 int i;
59
60 for (rs = avl_first(&rt->rt_root); rs != NULL;
61 rs = AVL_NEXT(&rt->rt_root, rs)) {
62 uint64_t size = rs->rs_end - rs->rs_start;
63 int idx = highbit64(size) - 1;
64
65 hist[idx]++;
66 ASSERT3U(hist[idx], !=, 0);
67 }
68
69 for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) {
70 if (hist[i] != rt->rt_histogram[i]) {
71 zfs_dbgmsg("i=%d, hist=%p, hist=%llu, rt_hist=%llu",
72 i, hist, hist[i], rt->rt_histogram[i]);
73 }
74 VERIFY3U(hist[i], ==, rt->rt_histogram[i]);
75 }
76 }
77
78 static void
79 range_tree_stat_incr(range_tree_t *rt, range_seg_t *rs)
80 {
81 uint64_t size = rs->rs_end - rs->rs_start;
82 int idx = highbit64(size) - 1;
83
84 ASSERT(size != 0);
85 ASSERT3U(idx, <,
86 sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram));
87
88 rt->rt_histogram[idx]++;
89 ASSERT3U(rt->rt_histogram[idx], !=, 0);
90 }
91
92 static void
93 range_tree_stat_decr(range_tree_t *rt, range_seg_t *rs)
94 {
95 uint64_t size = rs->rs_end - rs->rs_start;
96 int idx = highbit64(size) - 1;
97
98 ASSERT(size != 0);
99 ASSERT3U(idx, <,
100 sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram));
101
102 ASSERT3U(rt->rt_histogram[idx], !=, 0);
103 rt->rt_histogram[idx]--;
104 }
105
106 /*
107 * NOTE: caller is responsible for all locking.
108 */
109 static int
110 range_tree_seg_compare(const void *x1, const void *x2)
111 {
112 const range_seg_t *r1 = x1;
113 const range_seg_t *r2 = x2;
114
115 if (r1->rs_start < r2->rs_start) {
116 if (r1->rs_end > r2->rs_start)
117 return (0);
118 return (-1);
119 }
120 if (r1->rs_start > r2->rs_start) {
121 if (r1->rs_start < r2->rs_end)
122 return (0);
123 return (1);
124 }
125 return (0);
126 }
127
128 range_tree_t *
129 range_tree_create(range_tree_ops_t *ops, void *arg)
130 {
131 range_tree_t *rt;
132
133 rt = kmem_zalloc(sizeof (range_tree_t), KM_SLEEP);
134
135 avl_create(&rt->rt_root, range_tree_seg_compare,
136 sizeof (range_seg_t), offsetof(range_seg_t, rs_node));
137
138 rt->rt_ops = ops;
139 rt->rt_arg = arg;
140
141 if (rt->rt_ops != NULL)
142 rt->rt_ops->rtop_create(rt, rt->rt_arg);
143
144 return (rt);
145 }
146
147 void
148 range_tree_destroy(range_tree_t *rt)
149 {
150 VERIFY0(rt->rt_space);
151
152 if (rt->rt_ops != NULL)
153 rt->rt_ops->rtop_destroy(rt, rt->rt_arg);
154
155 avl_destroy(&rt->rt_root);
156 kmem_free(rt, sizeof (*rt));
157 }
158
159 void
160 range_tree_add(void *arg, uint64_t start, uint64_t size)
161 {
162 range_tree_t *rt = arg;
163 avl_index_t where;
164 range_seg_t rsearch, *rs_before, *rs_after, *rs;
165 uint64_t end = start + size;
166 boolean_t merge_before, merge_after;
167
168 VERIFY(size != 0);
169
170 rsearch.rs_start = start;
171 rsearch.rs_end = end;
172 rs = avl_find(&rt->rt_root, &rsearch, &where);
173
174 if (rs != NULL && rs->rs_start <= start && rs->rs_end >= end) {
175 zfs_panic_recover("zfs: allocating allocated segment"
176 "(offset=%llu size=%llu)\n",
177 (longlong_t)start, (longlong_t)size);
178 return;
179 }
180
181 /* Make sure we don't overlap with either of our neighbors */
182 VERIFY(rs == NULL);
183
184 rs_before = avl_nearest(&rt->rt_root, where, AVL_BEFORE);
185 rs_after = avl_nearest(&rt->rt_root, where, AVL_AFTER);
186
187 merge_before = (rs_before != NULL && rs_before->rs_end == start);
188 merge_after = (rs_after != NULL && rs_after->rs_start == end);
189
190 if (merge_before && merge_after) {
191 avl_remove(&rt->rt_root, rs_before);
192 if (rt->rt_ops != NULL) {
193 rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg);
194 rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg);
195 }
196
197 range_tree_stat_decr(rt, rs_before);
198 range_tree_stat_decr(rt, rs_after);
199
200 rs_after->rs_start = rs_before->rs_start;
201 kmem_cache_free(range_seg_cache, rs_before);
202 rs = rs_after;
203 } else if (merge_before) {
204 if (rt->rt_ops != NULL)
205 rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg);
206
207 range_tree_stat_decr(rt, rs_before);
208
209 rs_before->rs_end = end;
210 rs = rs_before;
211 } else if (merge_after) {
212 if (rt->rt_ops != NULL)
213 rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg);
214
215 range_tree_stat_decr(rt, rs_after);
216
217 rs_after->rs_start = start;
218 rs = rs_after;
219 } else {
220 rs = kmem_cache_alloc(range_seg_cache, KM_SLEEP);
221 rs->rs_start = start;
222 rs->rs_end = end;
223 avl_insert(&rt->rt_root, rs, where);
224 }
225
226 if (rt->rt_ops != NULL)
227 rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
228
229 range_tree_stat_incr(rt, rs);
230 rt->rt_space += size;
231 }
232
233 void
234 range_tree_remove(void *arg, uint64_t start, uint64_t size)
235 {
236 range_tree_t *rt = arg;
237 avl_index_t where;
238 range_seg_t rsearch, *rs, *newseg;
239 uint64_t end = start + size;
240 boolean_t left_over, right_over;
241
242 VERIFY3U(size, !=, 0);
243 VERIFY3U(size, <=, rt->rt_space);
244
245 rsearch.rs_start = start;
246 rsearch.rs_end = end;
247 rs = avl_find(&rt->rt_root, &rsearch, &where);
248
249 /* Make sure we completely overlap with someone */
250 if (rs == NULL) {
251 zfs_panic_recover("zfs: freeing free segment "
252 "(offset=%llu size=%llu)",
253 (longlong_t)start, (longlong_t)size);
254 return;
255 }
256 VERIFY3U(rs->rs_start, <=, start);
257 VERIFY3U(rs->rs_end, >=, end);
258
259 left_over = (rs->rs_start != start);
260 right_over = (rs->rs_end != end);
261
262 range_tree_stat_decr(rt, rs);
263
264 if (rt->rt_ops != NULL)
265 rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg);
266
267 if (left_over && right_over) {
268 newseg = kmem_cache_alloc(range_seg_cache, KM_SLEEP);
269 newseg->rs_start = end;
270 newseg->rs_end = rs->rs_end;
271 range_tree_stat_incr(rt, newseg);
272
273 rs->rs_end = start;
274
275 avl_insert_here(&rt->rt_root, newseg, rs, AVL_AFTER);
276 if (rt->rt_ops != NULL)
277 rt->rt_ops->rtop_add(rt, newseg, rt->rt_arg);
278 } else if (left_over) {
279 rs->rs_end = start;
280 } else if (right_over) {
281 rs->rs_start = end;
282 } else {
283 avl_remove(&rt->rt_root, rs);
284 kmem_cache_free(range_seg_cache, rs);
285 rs = NULL;
286 }
287
288 if (rs != NULL) {
289 range_tree_stat_incr(rt, rs);
290
291 if (rt->rt_ops != NULL)
292 rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
293 }
294
295 rt->rt_space -= size;
296 }
297
298 static range_seg_t *
299 range_tree_find_impl(range_tree_t *rt, uint64_t start, uint64_t size)
300 {
301 avl_index_t where;
302 range_seg_t rsearch;
303 uint64_t end = start + size;
304
305 VERIFY(size != 0);
306
307 rsearch.rs_start = start;
308 rsearch.rs_end = end;
309 return (avl_find(&rt->rt_root, &rsearch, &where));
310 }
311
312 static range_seg_t *
313 range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size)
314 {
315 range_seg_t *rs = range_tree_find_impl(rt, start, size);
316 if (rs != NULL && rs->rs_start <= start && rs->rs_end >= start + size)
317 return (rs);
318 return (NULL);
319 }
320
321 void
322 range_tree_verify(range_tree_t *rt, uint64_t off, uint64_t size)
323 {
324 range_seg_t *rs;
325
326 rs = range_tree_find(rt, off, size);
327 if (rs != NULL)
328 panic("freeing free block; rs=%p", (void *)rs);
329 }
330
331 boolean_t
332 range_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size)
333 {
334 return (range_tree_find(rt, start, size) != NULL);
335 }
336
337 /*
338 * Ensure that this range is not in the tree, regardless of whether
339 * it is currently in the tree.
340 */
341 void
342 range_tree_clear(range_tree_t *rt, uint64_t start, uint64_t size)
343 {
344 range_seg_t *rs;
345
346 if (size == 0)
347 return;
348
349 while ((rs = range_tree_find_impl(rt, start, size)) != NULL) {
350 uint64_t free_start = MAX(rs->rs_start, start);
351 uint64_t free_end = MIN(rs->rs_end, start + size);
352 range_tree_remove(rt, free_start, free_end - free_start);
353 }
354 }
355
356 void
357 range_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst)
358 {
359 range_tree_t *rt;
360
361 ASSERT0(range_tree_space(*rtdst));
362 ASSERT0(avl_numnodes(&(*rtdst)->rt_root));
363
364 rt = *rtsrc;
365 *rtsrc = *rtdst;
366 *rtdst = rt;
367 }
368
369 void
370 range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg)
371 {
372 range_seg_t *rs;
373 void *cookie = NULL;
374
375
376 if (rt->rt_ops != NULL)
377 rt->rt_ops->rtop_vacate(rt, rt->rt_arg);
378
379 while ((rs = avl_destroy_nodes(&rt->rt_root, &cookie)) != NULL) {
380 if (func != NULL)
381 func(arg, rs->rs_start, rs->rs_end - rs->rs_start);
382 kmem_cache_free(range_seg_cache, rs);
383 }
384
385 bzero(rt->rt_histogram, sizeof (rt->rt_histogram));
386 rt->rt_space = 0;
387 }
388
389 void
390 range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg)
391 {
392 range_seg_t *rs;
393
394
395 for (rs = avl_first(&rt->rt_root); rs; rs = AVL_NEXT(&rt->rt_root, rs))
396 func(arg, rs->rs_start, rs->rs_end - rs->rs_start);
397 }
398
399 uint64_t
400 range_tree_space(range_tree_t *rt)
401 {
402 return (rt->rt_space);
403 }
|
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25 /*
26 * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
27 */
28
29 #include <sys/zfs_context.h>
30 #include <sys/spa.h>
31 #include <sys/dmu.h>
32 #include <sys/dnode.h>
33 #include <sys/zio.h>
34 #include <sys/range_tree.h>
35
36 /*
37 * Range trees are tree-based data structures that can be used to
38 * track free space or generally any space allocation information.
39 * A range tree keeps track of individual segments and automatically
40 * provides facilities such as adjacent extent merging and extent
41 * splitting in response to range add/remove requests.
42 *
43 * A range tree starts out completely empty, with no segments in it.
44 * Adding an allocation via range_tree_add to the range tree can either:
45 * 1) create a new extent
46 * 2) extend an adjacent extent
47 * 3) merge two adjacent extents
48 * Conversely, removing an allocation via range_tree_add can:
49 * 1) completely remove an extent
50 * 2) shorten an extent (if the allocation was near one of its ends)
51 * 3) split an extent into two extents, in effect punching a hole
52 *
53 * A range tree is also capable of 'bridging' gaps when adding
54 * allocations. This is useful for cases when close proximity of
55 * allocations is an important detail that needs to be represented
56 * in the range tree. See range_tree_set_gap(). The default behavior
57 * is not to bridge gaps (i.e. the maximum allowed gap size is 0).
58 *
59 * In order to traverse a range tree, use either the range_tree_walk
60 * or range_tree_vacate functions.
61 *
62 * To obtain more accurate information on individual segment
63 * operations that the range tree performs "under the hood", you can
64 * specify a set of callbacks by passing a range_tree_ops_t structure
65 * to the range_tree_create and range_tree_create_custom functions.
66 * Any callbacks that are non-NULL are then called at the appropriate
67 * times.
68 *
69 * It is possible to store additional custom information with each
70 * segment. This is typically useful when using one or more of the
71 * operation callbacks mentioned above. To do so, use the
72 * range_tree_create_custom function to create your range tree and
73 * pass a custom kmem cache allocator. This allocator must allocate
74 * objects of at least sizeof (range_seg_t). The range tree will use
75 * the start of that object as a range_seg_t to keep its internal
76 * data structures and you can use the remainder of the object to
77 * store arbitrary additional fields as necessary.
78 */
79
80 kmem_cache_t *range_seg_cache;
81
82 void
83 range_tree_init(void)
84 {
85 ASSERT(range_seg_cache == NULL);
86 range_seg_cache = kmem_cache_create("range_seg_cache",
87 sizeof (range_seg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
88 }
89
90 void
91 range_tree_fini(void)
92 {
93 kmem_cache_destroy(range_seg_cache);
94 range_seg_cache = NULL;
95 }
96
97 void
98 range_tree_stat_verify(range_tree_t *rt)
99 {
102 int i;
103
104 for (rs = avl_first(&rt->rt_root); rs != NULL;
105 rs = AVL_NEXT(&rt->rt_root, rs)) {
106 uint64_t size = rs->rs_end - rs->rs_start;
107 int idx = highbit64(size) - 1;
108
109 hist[idx]++;
110 ASSERT3U(hist[idx], !=, 0);
111 }
112
113 for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) {
114 if (hist[i] != rt->rt_histogram[i]) {
115 zfs_dbgmsg("i=%d, hist=%p, hist=%llu, rt_hist=%llu",
116 i, hist, hist[i], rt->rt_histogram[i]);
117 }
118 VERIFY3U(hist[i], ==, rt->rt_histogram[i]);
119 }
120 }
121
122 /*
123 * Sets the inter-segment gap. Subsequent adding of segments will examine
124 * if an adjacent segment is less than or equal to `gap' apart. If it is,
125 * the segments will be joined together, in effect 'bridging' the gap.
126 */
127 void
128 range_tree_set_gap(range_tree_t *rt, uint64_t gap)
129 {
130 ASSERT(MUTEX_HELD(rt->rt_lock));
131 rt->rt_gap = gap;
132 }
133
134 /*
135 * Changes out the lock used by the range tree. Useful when you are moving
136 * the range tree between containing structures without having to recreate
137 * it. Both the old and new locks must be held by the caller.
138 */
139 void
140 range_tree_set_lock(range_tree_t *rt, kmutex_t *lp)
141 {
142 ASSERT(MUTEX_HELD(rt->rt_lock) && MUTEX_HELD(lp));
143 rt->rt_lock = lp;
144 }
145
146 static void
147 range_tree_stat_incr(range_tree_t *rt, range_seg_t *rs)
148 {
149 uint64_t size = rs->rs_end - rs->rs_start;
150 int idx = highbit64(size) - 1;
151
152 ASSERT(size != 0);
153 ASSERT3U(idx, <,
154 sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram));
155
156 ASSERT(MUTEX_HELD(rt->rt_lock));
157 rt->rt_histogram[idx]++;
158 ASSERT3U(rt->rt_histogram[idx], !=, 0);
159 }
160
161 static void
162 range_tree_stat_decr(range_tree_t *rt, range_seg_t *rs)
163 {
164 uint64_t size = rs->rs_end - rs->rs_start;
165 int idx = highbit64(size) - 1;
166
167 ASSERT(size != 0);
168 ASSERT3U(idx, <,
169 sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram));
170
171 ASSERT(MUTEX_HELD(rt->rt_lock));
172 ASSERT3U(rt->rt_histogram[idx], !=, 0);
173 rt->rt_histogram[idx]--;
174 }
175
176 /*
177 * NOTE: caller is responsible for all locking.
178 */
179 static int
180 range_tree_seg_compare(const void *x1, const void *x2)
181 {
182 const range_seg_t *r1 = x1;
183 const range_seg_t *r2 = x2;
184
185 if (r1->rs_start < r2->rs_start) {
186 if (r1->rs_end > r2->rs_start)
187 return (0);
188 return (-1);
189 }
190 if (r1->rs_start > r2->rs_start) {
191 if (r1->rs_start < r2->rs_end)
192 return (0);
193 return (1);
194 }
195 return (0);
196 }
197
198 range_tree_t *
199 range_tree_create(range_tree_ops_t *ops, void *arg, kmutex_t *lp)
200 {
201 range_tree_t *rt = kmem_zalloc(sizeof (range_tree_t), KM_SLEEP);
202
203 avl_create(&rt->rt_root, range_tree_seg_compare,
204 sizeof (range_seg_t), offsetof(range_seg_t, rs_node));
205
206 rt->rt_lock = lp;
207 rt->rt_ops = ops;
208 rt->rt_arg = arg;
209
210 if (rt->rt_ops != NULL && rt->rt_ops->rtop_create != NULL)
211 rt->rt_ops->rtop_create(rt, rt->rt_arg);
212
213 return (rt);
214 }
215
216 void
217 range_tree_destroy(range_tree_t *rt)
218 {
219 VERIFY0(rt->rt_space);
220
221 if (rt->rt_ops != NULL && rt->rt_ops->rtop_destroy != NULL)
222 rt->rt_ops->rtop_destroy(rt, rt->rt_arg);
223
224 avl_destroy(&rt->rt_root);
225 kmem_free(rt, sizeof (*rt));
226 }
227
228 void
229 range_tree_add_fill(void *arg, uint64_t start, uint64_t size, uint64_t fill)
230 {
231 range_tree_t *rt = arg;
232 avl_index_t where;
233 range_seg_t rsearch, *rs_before, *rs_after, *rs;
234 uint64_t end = start + size, gap_extra = 0;
235 boolean_t merge_before, merge_after;
236
237 ASSERT(MUTEX_HELD(rt->rt_lock));
238 VERIFY(size != 0);
239
240 rsearch.rs_start = start;
241 rsearch.rs_end = end;
242 rs = avl_find(&rt->rt_root, &rsearch, &where);
243
244 if (rs != NULL && rs->rs_start <= start && rs->rs_end >= end &&
245 rt->rt_gap == 0) {
246 zfs_panic_recover("zfs: allocating allocated segment"
247 "(offset=%llu size=%llu)\n",
248 (longlong_t)start, (longlong_t)size);
249 return;
250 }
251
252 if (rt->rt_gap != 0) {
253 if (rs != NULL) {
254 if (rs->rs_start <= start && rs->rs_end >= end) {
255 if (rt->rt_ops != NULL &&
256 rt->rt_ops->rtop_remove != NULL)
257 rt->rt_ops->rtop_remove(rt, rs,
258 rt->rt_arg);
259 rs->rs_fill += fill;
260 if (rt->rt_ops != NULL &&
261 rt->rt_ops->rtop_add != NULL) {
262 rt->rt_ops->rtop_add(rt, rs,
263 rt->rt_arg);
264 }
265 return;
266 }
267 ASSERT0(fill);
268 if (rs->rs_start < start) {
269 ASSERT3U(end, >, rs->rs_end);
270 range_tree_add(rt, rs->rs_end, end -
271 rs->rs_end);
272 } else {
273 ASSERT3U(rs->rs_start, >, start);
274 range_tree_add(rt, start, rs->rs_start - start);
275 }
276 return;
277 }
278 } else {
279 /* Make sure we don't overlap with either of our neighbors */
280 VERIFY(rs == NULL);
281 }
282
283 rs_before = avl_nearest(&rt->rt_root, where, AVL_BEFORE);
284 rs_after = avl_nearest(&rt->rt_root, where, AVL_AFTER);
285
286 merge_before = (rs_before != NULL &&
287 start - rs_before->rs_end <= rt->rt_gap);
288 merge_after = (rs_after != NULL &&
289 rs_after->rs_start - end <= rt->rt_gap);
290 if (rt->rt_gap != 0) {
291 if (merge_before)
292 gap_extra += start - rs_before->rs_end;
293 if (merge_after)
294 gap_extra += rs_after->rs_start - end;
295 }
296
297 if (merge_before && merge_after) {
298 avl_remove(&rt->rt_root, rs_before);
299 if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) {
300 rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg);
301 rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg);
302 }
303
304 range_tree_stat_decr(rt, rs_before);
305 range_tree_stat_decr(rt, rs_after);
306
307 rs_after->rs_start = rs_before->rs_start;
308 rs_after->rs_fill += rs_before->rs_fill + fill;
309 kmem_cache_free(range_seg_cache, rs_before);
310 rs = rs_after;
311 } else if (merge_before) {
312 if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL)
313 rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg);
314
315 range_tree_stat_decr(rt, rs_before);
316
317 rs_before->rs_end = end;
318 rs_before->rs_fill += fill;
319 rs = rs_before;
320 } else if (merge_after) {
321 if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL)
322 rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg);
323
324 range_tree_stat_decr(rt, rs_after);
325
326 rs_after->rs_start = start;
327 rs_after->rs_fill += fill;
328 rs = rs_after;
329 } else {
330 rs = kmem_cache_alloc(range_seg_cache, KM_SLEEP);
331 rs->rs_start = start;
332 rs->rs_end = end;
333 rs->rs_fill = fill;
334 avl_insert(&rt->rt_root, rs, where);
335 }
336
337 if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL)
338 rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
339
340 range_tree_stat_incr(rt, rs);
341 rt->rt_space += size + gap_extra;
342 }
343
344 void
345 range_tree_add(void *arg, uint64_t start, uint64_t size)
346 {
347 range_tree_add_fill(arg, start, size, 0);
348 }
349
350 static void
351 range_tree_remove_impl(void *arg, uint64_t start, uint64_t size, uint64_t fill,
352 uint64_t fill_left, boolean_t partial_overlap)
353 {
354 range_tree_t *rt = arg;
355 avl_index_t where;
356 range_seg_t rsearch, *rs, *newseg;
357 uint64_t end = start + size;
358 boolean_t left_over, right_over;
359
360 ASSERT(MUTEX_HELD(rt->rt_lock));
361 VERIFY3U(size, !=, 0);
362 if (!partial_overlap) {
363 VERIFY3U(size, <=, rt->rt_space);
364 } else {
365 VERIFY0(fill);
366 VERIFY0(fill_left);
367 }
368
369 rsearch.rs_start = start;
370 rsearch.rs_end = end;
371
372 while ((rs = avl_find(&rt->rt_root, &rsearch, &where)) != NULL ||
373 !partial_overlap) {
374 uint64_t overlap_sz;
375
376 if (partial_overlap) {
377 if (rs->rs_start <= start && rs->rs_end >= end)
378 overlap_sz = size;
379 else if (rs->rs_start > start && rs->rs_end < end)
380 overlap_sz = rs->rs_end - rs->rs_start;
381 else if (rs->rs_end < end)
382 overlap_sz = rs->rs_end - start;
383 else /* rs->rs_start > start */
384 overlap_sz = end - rs->rs_start;
385 } else {
386 /* Make sure we completely overlapped with someone */
387 if (rs == NULL) {
388 zfs_panic_recover("zfs: freeing free segment "
389 "(offset=%llu size=%llu)",
390 (longlong_t)start, (longlong_t)size);
391 return;
392 }
393 VERIFY3U(rs->rs_start, <=, start);
394 VERIFY3U(rs->rs_end, >=, end);
395 overlap_sz = size;
396 }
397
398 left_over = (rs->rs_start < start);
399 right_over = (rs->rs_end > end);
400
401 range_tree_stat_decr(rt, rs);
402
403 if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL)
404 rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg);
405
406 if (left_over && right_over) {
407 newseg = kmem_cache_alloc(range_seg_cache, KM_SLEEP);
408 newseg->rs_start = end;
409 newseg->rs_end = rs->rs_end;
410 ASSERT3U(rs->rs_fill, >=, (fill + fill_left));
411 newseg->rs_fill = rs->rs_fill - (fill + fill_left);
412 range_tree_stat_incr(rt, newseg);
413
414 rs->rs_end = start;
415 rs->rs_fill = fill_left;
416
417 avl_insert_here(&rt->rt_root, newseg, rs, AVL_AFTER);
418 if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL)
419 rt->rt_ops->rtop_add(rt, newseg, rt->rt_arg);
420 } else if (left_over) {
421 rs->rs_end = start;
422 ASSERT3U(rs->rs_fill, >=, fill);
423 rs->rs_fill -= fill;
424 } else if (right_over) {
425 rs->rs_start = end;
426 ASSERT3U(rs->rs_fill, >=, fill);
427 rs->rs_fill -= fill;
428 } else {
429 ASSERT3U(rs->rs_fill, ==, fill);
430 ASSERT(fill == 0 || !partial_overlap);
431 avl_remove(&rt->rt_root, rs);
432 kmem_cache_free(range_seg_cache, rs);
433 rs = NULL;
434 }
435
436 if (rs != NULL) {
437 range_tree_stat_incr(rt, rs);
438
439 if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL)
440 rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
441 }
442
443 rt->rt_space -= overlap_sz;
444 if (!partial_overlap) {
445 /*
446 * There can't be any more segments overlapping with
447 * us, so no sense in performing an extra search.
448 */
449 break;
450 }
451 }
452 }
453
454 void
455 range_tree_remove(void *arg, uint64_t start, uint64_t size)
456 {
457 range_tree_remove_impl(arg, start, size, 0, 0, B_FALSE);
458 }
459
460 void
461 range_tree_remove_overlap(void *arg, uint64_t start, uint64_t size)
462 {
463 range_tree_remove_impl(arg, start, size, 0, 0, B_TRUE);
464 }
465
466 void
467 range_tree_remove_fill(void *arg, uint64_t start, uint64_t size,
468 uint64_t fill, uint64_t remain_left)
469 {
470 range_tree_remove_impl(arg, start, size, fill, remain_left, B_FALSE);
471 }
472
473 static range_seg_t *
474 range_tree_find_impl(range_tree_t *rt, uint64_t start, uint64_t size)
475 {
476 avl_index_t where;
477 range_seg_t rsearch;
478 uint64_t end = start + size;
479
480 ASSERT(MUTEX_HELD(rt->rt_lock));
481 VERIFY(size != 0);
482
483 rsearch.rs_start = start;
484 rsearch.rs_end = end;
485 return (avl_find(&rt->rt_root, &rsearch, &where));
486 }
487
488 void *
489 range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size)
490 {
491 range_seg_t *rs = range_tree_find_impl(rt, start, size);
492 if (rs != NULL && rs->rs_start <= start && rs->rs_end >= start + size)
493 return (rs);
494 return (NULL);
495 }
496
497 /*
498 * Given an extent start offset and size, will look through the provided
499 * range tree and find a suitable start offset (starting at `start') such
500 * that the requested extent _doesn't_ overlap with any range segment in
501 * the range tree.
502 */
503 uint64_t
504 range_tree_find_gap(range_tree_t *rt, uint64_t start, uint64_t size)
505 {
506 range_seg_t *rs;
507 while ((rs = range_tree_find_impl(rt, start, size)) != NULL)
508 start = rs->rs_end;
509 return (start);
510 }
511
512 void
513 range_tree_verify(range_tree_t *rt, uint64_t off, uint64_t size)
514 {
515 range_seg_t *rs;
516
517 mutex_enter(rt->rt_lock);
518 rs = range_tree_find(rt, off, size);
519 if (rs != NULL)
520 panic("freeing free block; rs=%p", (void *)rs);
521 mutex_exit(rt->rt_lock);
522 }
523
524 boolean_t
525 range_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size)
526 {
527 return (range_tree_find(rt, start, size) != NULL);
528 }
529
530 /*
531 * Ensure that this range is not in the tree, regardless of whether
532 * it is currently in the tree.
533 */
534 void
535 range_tree_clear(range_tree_t *rt, uint64_t start, uint64_t size)
536 {
537 range_seg_t *rs;
538
539 while ((rs = range_tree_find_impl(rt, start, size)) != NULL) {
540 uint64_t free_start = MAX(rs->rs_start, start);
541 uint64_t free_end = MIN(rs->rs_end, start + size);
542 range_tree_remove(rt, free_start, free_end - free_start);
543 }
544 }
545
546 void
547 range_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst)
548 {
549 range_tree_t *rt;
550
551 ASSERT(MUTEX_HELD((*rtsrc)->rt_lock));
552 ASSERT0(range_tree_space(*rtdst));
553 ASSERT0(avl_numnodes(&(*rtdst)->rt_root));
554
555 rt = *rtsrc;
556 *rtsrc = *rtdst;
557 *rtdst = rt;
558 }
559
560 void
561 range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg)
562 {
563 range_seg_t *rs;
564 void *cookie = NULL;
565
566 ASSERT(MUTEX_HELD(rt->rt_lock));
567
568 if (rt->rt_ops != NULL && rt->rt_ops->rtop_vacate != NULL)
569 rt->rt_ops->rtop_vacate(rt, rt->rt_arg);
570
571 while ((rs = avl_destroy_nodes(&rt->rt_root, &cookie)) != NULL) {
572 if (func != NULL)
573 func(arg, rs->rs_start, rs->rs_end - rs->rs_start);
574 kmem_cache_free(range_seg_cache, rs);
575 }
576
577 bzero(rt->rt_histogram, sizeof (rt->rt_histogram));
578 rt->rt_space = 0;
579 }
580
581 void
582 range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg)
583 {
584 range_seg_t *rs;
585
586 ASSERT(MUTEX_HELD(rt->rt_lock));
587
588 for (rs = avl_first(&rt->rt_root); rs; rs = AVL_NEXT(&rt->rt_root, rs))
589 func(arg, rs->rs_start, rs->rs_end - rs->rs_start);
590 }
591
592 range_seg_t *
593 range_tree_first(range_tree_t *rt)
594 {
595 ASSERT(MUTEX_HELD(rt->rt_lock));
596 return (avl_first(&rt->rt_root));
597 }
598
599 uint64_t
600 range_tree_space(range_tree_t *rt)
601 {
602 return (rt->rt_space);
603 }
|