Print this page
10592 misc. metaslab and vdev related ZoL bug fixes
Portions contributed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed by: Giuseppe Di Natale <guss80@gmail.com>
Reviewed by: George Melikov <mail@gmelikov.ru>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed by: Tony Hutter <hutter2@llnl.gov>
Reviewed by: Kody Kantor <kody.kantor@joyent.com>
Approved by: Dan McDonald <danmcd@joyent.com>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/zfs/range_tree.c
+++ new/usr/src/uts/common/fs/zfs/range_tree.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25 /*
26 26 * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
27 27 */
28 28
29 29 #include <sys/zfs_context.h>
30 30 #include <sys/spa.h>
31 31 #include <sys/dmu.h>
32 32 #include <sys/dnode.h>
33 33 #include <sys/zio.h>
34 34 #include <sys/range_tree.h>
35 35
36 36 kmem_cache_t *range_seg_cache;
37 37
38 38 void
39 39 range_tree_init(void)
40 40 {
41 41 ASSERT(range_seg_cache == NULL);
42 42 range_seg_cache = kmem_cache_create("range_seg_cache",
43 43 sizeof (range_seg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
44 44 }
45 45
46 46 void
47 47 range_tree_fini(void)
48 48 {
49 49 kmem_cache_destroy(range_seg_cache);
50 50 range_seg_cache = NULL;
51 51 }
52 52
53 53 void
54 54 range_tree_stat_verify(range_tree_t *rt)
55 55 {
56 56 range_seg_t *rs;
57 57 uint64_t hist[RANGE_TREE_HISTOGRAM_SIZE] = { 0 };
58 58 int i;
59 59
60 60 for (rs = avl_first(&rt->rt_root); rs != NULL;
61 61 rs = AVL_NEXT(&rt->rt_root, rs)) {
62 62 uint64_t size = rs->rs_end - rs->rs_start;
63 63 int idx = highbit64(size) - 1;
64 64
65 65 hist[idx]++;
66 66 ASSERT3U(hist[idx], !=, 0);
67 67 }
68 68
69 69 for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) {
70 70 if (hist[i] != rt->rt_histogram[i]) {
71 71 zfs_dbgmsg("i=%d, hist=%p, hist=%llu, rt_hist=%llu",
72 72 i, hist, hist[i], rt->rt_histogram[i]);
73 73 }
74 74 VERIFY3U(hist[i], ==, rt->rt_histogram[i]);
75 75 }
76 76 }
77 77
78 78 static void
79 79 range_tree_stat_incr(range_tree_t *rt, range_seg_t *rs)
80 80 {
81 81 uint64_t size = rs->rs_end - rs->rs_start;
82 82 int idx = highbit64(size) - 1;
83 83
84 84 ASSERT(size != 0);
85 85 ASSERT3U(idx, <,
86 86 sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram));
87 87
88 88 rt->rt_histogram[idx]++;
89 89 ASSERT3U(rt->rt_histogram[idx], !=, 0);
90 90 }
91 91
92 92 static void
93 93 range_tree_stat_decr(range_tree_t *rt, range_seg_t *rs)
94 94 {
95 95 uint64_t size = rs->rs_end - rs->rs_start;
96 96 int idx = highbit64(size) - 1;
97 97
98 98 ASSERT(size != 0);
99 99 ASSERT3U(idx, <,
100 100 sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram));
101 101
102 102 ASSERT3U(rt->rt_histogram[idx], !=, 0);
103 103 rt->rt_histogram[idx]--;
104 104 }
105 105
106 106 /*
107 107 * NOTE: caller is responsible for all locking.
108 108 */
109 109 static int
110 110 range_tree_seg_compare(const void *x1, const void *x2)
111 111 {
112 112 const range_seg_t *r1 = x1;
113 113 const range_seg_t *r2 = x2;
114 114
115 115 if (r1->rs_start < r2->rs_start) {
116 116 if (r1->rs_end > r2->rs_start)
117 117 return (0);
118 118 return (-1);
119 119 }
120 120 if (r1->rs_start > r2->rs_start) {
121 121 if (r1->rs_start < r2->rs_end)
122 122 return (0);
123 123 return (1);
124 124 }
125 125 return (0);
126 126 }
127 127
128 128 range_tree_t *
129 129 range_tree_create(range_tree_ops_t *ops, void *arg)
130 130 {
131 131 range_tree_t *rt;
132 132
133 133 rt = kmem_zalloc(sizeof (range_tree_t), KM_SLEEP);
134 134
135 135 avl_create(&rt->rt_root, range_tree_seg_compare,
136 136 sizeof (range_seg_t), offsetof(range_seg_t, rs_node));
137 137
138 138 rt->rt_ops = ops;
139 139 rt->rt_arg = arg;
140 140
141 141 if (rt->rt_ops != NULL)
142 142 rt->rt_ops->rtop_create(rt, rt->rt_arg);
143 143
144 144 return (rt);
145 145 }
146 146
147 147 void
148 148 range_tree_destroy(range_tree_t *rt)
149 149 {
150 150 VERIFY0(rt->rt_space);
151 151
152 152 if (rt->rt_ops != NULL)
153 153 rt->rt_ops->rtop_destroy(rt, rt->rt_arg);
154 154
155 155 avl_destroy(&rt->rt_root);
156 156 kmem_free(rt, sizeof (*rt));
157 157 }
158 158
159 159 void
160 160 range_tree_add(void *arg, uint64_t start, uint64_t size)
161 161 {
162 162 range_tree_t *rt = arg;
163 163 avl_index_t where;
164 164 range_seg_t rsearch, *rs_before, *rs_after, *rs;
165 165 uint64_t end = start + size;
166 166 boolean_t merge_before, merge_after;
167 167
168 168 VERIFY(size != 0);
169 169
170 170 rsearch.rs_start = start;
171 171 rsearch.rs_end = end;
172 172 rs = avl_find(&rt->rt_root, &rsearch, &where);
173 173
174 174 if (rs != NULL && rs->rs_start <= start && rs->rs_end >= end) {
175 175 zfs_panic_recover("zfs: allocating allocated segment"
176 176 "(offset=%llu size=%llu)\n",
177 177 (longlong_t)start, (longlong_t)size);
178 178 return;
179 179 }
180 180
181 181 /* Make sure we don't overlap with either of our neighbors */
182 182 VERIFY3P(rs, ==, NULL);
183 183
184 184 rs_before = avl_nearest(&rt->rt_root, where, AVL_BEFORE);
185 185 rs_after = avl_nearest(&rt->rt_root, where, AVL_AFTER);
186 186
187 187 merge_before = (rs_before != NULL && rs_before->rs_end == start);
188 188 merge_after = (rs_after != NULL && rs_after->rs_start == end);
189 189
190 190 if (merge_before && merge_after) {
191 191 avl_remove(&rt->rt_root, rs_before);
192 192 if (rt->rt_ops != NULL) {
193 193 rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg);
194 194 rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg);
195 195 }
196 196
197 197 range_tree_stat_decr(rt, rs_before);
198 198 range_tree_stat_decr(rt, rs_after);
199 199
200 200 rs_after->rs_start = rs_before->rs_start;
201 201 kmem_cache_free(range_seg_cache, rs_before);
202 202 rs = rs_after;
203 203 } else if (merge_before) {
204 204 if (rt->rt_ops != NULL)
205 205 rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg);
206 206
207 207 range_tree_stat_decr(rt, rs_before);
208 208
209 209 rs_before->rs_end = end;
210 210 rs = rs_before;
211 211 } else if (merge_after) {
212 212 if (rt->rt_ops != NULL)
213 213 rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg);
214 214
215 215 range_tree_stat_decr(rt, rs_after);
216 216
217 217 rs_after->rs_start = start;
218 218 rs = rs_after;
219 219 } else {
220 220 rs = kmem_cache_alloc(range_seg_cache, KM_SLEEP);
221 221 rs->rs_start = start;
222 222 rs->rs_end = end;
223 223 avl_insert(&rt->rt_root, rs, where);
224 224 }
225 225
226 226 if (rt->rt_ops != NULL)
227 227 rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
228 228
229 229 range_tree_stat_incr(rt, rs);
230 230 rt->rt_space += size;
231 231 }
232 232
233 233 void
234 234 range_tree_remove(void *arg, uint64_t start, uint64_t size)
235 235 {
236 236 range_tree_t *rt = arg;
237 237 avl_index_t where;
238 238 range_seg_t rsearch, *rs, *newseg;
239 239 uint64_t end = start + size;
240 240 boolean_t left_over, right_over;
241 241
242 242 VERIFY3U(size, !=, 0);
243 243 VERIFY3U(size, <=, rt->rt_space);
244 244
245 245 rsearch.rs_start = start;
246 246 rsearch.rs_end = end;
247 247 rs = avl_find(&rt->rt_root, &rsearch, &where);
248 248
249 249 /* Make sure we completely overlap with someone */
250 250 if (rs == NULL) {
251 251 zfs_panic_recover("zfs: freeing free segment "
252 252 "(offset=%llu size=%llu)",
253 253 (longlong_t)start, (longlong_t)size);
254 254 return;
255 255 }
256 256 VERIFY3U(rs->rs_start, <=, start);
257 257 VERIFY3U(rs->rs_end, >=, end);
258 258
259 259 left_over = (rs->rs_start != start);
260 260 right_over = (rs->rs_end != end);
261 261
262 262 range_tree_stat_decr(rt, rs);
263 263
264 264 if (rt->rt_ops != NULL)
265 265 rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg);
266 266
267 267 if (left_over && right_over) {
268 268 newseg = kmem_cache_alloc(range_seg_cache, KM_SLEEP);
269 269 newseg->rs_start = end;
270 270 newseg->rs_end = rs->rs_end;
271 271 range_tree_stat_incr(rt, newseg);
272 272
273 273 rs->rs_end = start;
274 274
275 275 avl_insert_here(&rt->rt_root, newseg, rs, AVL_AFTER);
276 276 if (rt->rt_ops != NULL)
277 277 rt->rt_ops->rtop_add(rt, newseg, rt->rt_arg);
278 278 } else if (left_over) {
279 279 rs->rs_end = start;
280 280 } else if (right_over) {
281 281 rs->rs_start = end;
282 282 } else {
283 283 avl_remove(&rt->rt_root, rs);
284 284 kmem_cache_free(range_seg_cache, rs);
285 285 rs = NULL;
286 286 }
287 287
288 288 if (rs != NULL) {
289 289 range_tree_stat_incr(rt, rs);
290 290
291 291 if (rt->rt_ops != NULL)
292 292 rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
293 293 }
294 294
295 295 rt->rt_space -= size;
296 296 }
297 297
298 298 static range_seg_t *
299 299 range_tree_find_impl(range_tree_t *rt, uint64_t start, uint64_t size)
300 300 {
301 301 range_seg_t rsearch;
302 302 uint64_t end = start + size;
303 303
304 304 VERIFY(size != 0);
305 305
306 306 rsearch.rs_start = start;
307 307 rsearch.rs_end = end;
308 308 return (avl_find(&rt->rt_root, &rsearch, NULL));
309 309 }
310 310
|
↓ open down ↓ |
310 lines elided |
↑ open up ↑ |
311 311 static range_seg_t *
312 312 range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size)
313 313 {
314 314 range_seg_t *rs = range_tree_find_impl(rt, start, size);
315 315 if (rs != NULL && rs->rs_start <= start && rs->rs_end >= start + size)
316 316 return (rs);
317 317 return (NULL);
318 318 }
319 319
320 320 void
321 -range_tree_verify(range_tree_t *rt, uint64_t off, uint64_t size)
321 +range_tree_verify_not_present(range_tree_t *rt, uint64_t off, uint64_t size)
322 322 {
323 - range_seg_t *rs;
324 -
325 - rs = range_tree_find(rt, off, size);
323 + range_seg_t *rs = range_tree_find(rt, off, size);
326 324 if (rs != NULL)
327 - panic("freeing free block; rs=%p", (void *)rs);
325 + panic("segment already in tree; rs=%p", (void *)rs);
328 326 }
329 327
330 328 boolean_t
331 329 range_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size)
332 330 {
333 331 return (range_tree_find(rt, start, size) != NULL);
334 332 }
335 333
336 334 /*
337 335 * Ensure that this range is not in the tree, regardless of whether
338 336 * it is currently in the tree.
339 337 */
340 338 void
341 339 range_tree_clear(range_tree_t *rt, uint64_t start, uint64_t size)
342 340 {
343 341 range_seg_t *rs;
344 342
345 343 if (size == 0)
346 344 return;
347 345
348 346 while ((rs = range_tree_find_impl(rt, start, size)) != NULL) {
349 347 uint64_t free_start = MAX(rs->rs_start, start);
350 348 uint64_t free_end = MIN(rs->rs_end, start + size);
351 349 range_tree_remove(rt, free_start, free_end - free_start);
352 350 }
353 351 }
354 352
355 353 void
356 354 range_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst)
357 355 {
358 356 range_tree_t *rt;
359 357
360 358 ASSERT0(range_tree_space(*rtdst));
361 359 ASSERT0(avl_numnodes(&(*rtdst)->rt_root));
362 360
363 361 rt = *rtsrc;
364 362 *rtsrc = *rtdst;
365 363 *rtdst = rt;
366 364 }
367 365
368 366 void
369 367 range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg)
370 368 {
371 369 range_seg_t *rs;
372 370 void *cookie = NULL;
373 371
374 372
375 373 if (rt->rt_ops != NULL)
376 374 rt->rt_ops->rtop_vacate(rt, rt->rt_arg);
377 375
378 376 while ((rs = avl_destroy_nodes(&rt->rt_root, &cookie)) != NULL) {
379 377 if (func != NULL)
380 378 func(arg, rs->rs_start, rs->rs_end - rs->rs_start);
381 379 kmem_cache_free(range_seg_cache, rs);
382 380 }
383 381
384 382 bzero(rt->rt_histogram, sizeof (rt->rt_histogram));
385 383 rt->rt_space = 0;
386 384 }
387 385
388 386 void
389 387 range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg)
390 388 {
391 389 range_seg_t *rs;
392 390
393 391 for (rs = avl_first(&rt->rt_root); rs; rs = AVL_NEXT(&rt->rt_root, rs))
394 392 func(arg, rs->rs_start, rs->rs_end - rs->rs_start);
395 393 }
396 394
397 395 uint64_t
398 396 range_tree_space(range_tree_t *rt)
399 397 {
400 398 return (rt->rt_space);
401 399 }
402 400
403 401 boolean_t
404 402 range_tree_is_empty(range_tree_t *rt)
405 403 {
406 404 ASSERT(rt != NULL);
407 405 return (range_tree_space(rt) == 0);
408 406 }
409 407
410 408 uint64_t
411 409 range_tree_min(range_tree_t *rt)
412 410 {
413 411 range_seg_t *rs = avl_first(&rt->rt_root);
414 412 return (rs != NULL ? rs->rs_start : 0);
415 413 }
416 414
417 415 uint64_t
418 416 range_tree_max(range_tree_t *rt)
419 417 {
420 418 range_seg_t *rs = avl_last(&rt->rt_root);
421 419 return (rs != NULL ? rs->rs_end : 0);
422 420 }
423 421
424 422 uint64_t
425 423 range_tree_span(range_tree_t *rt)
426 424 {
427 425 return (range_tree_max(rt) - range_tree_min(rt));
428 426 }
|
↓ open down ↓ |
91 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX