Print this page
NEX-3165 need some dedup improvements
Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
re #12611 rb4105 zpool import panic in ddt_zap_count()
re #12585 rb4049 ZFS++ work port - refactoring to improve separation of open/closed code, bug fixes, performance improvements - open code
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/zfs/sys/ddt.h
+++ new/usr/src/uts/common/fs/zfs/sys/ddt.h
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
|
↓ open down ↓ |
12 lines elided |
↑ open up ↑ |
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
23 + * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
23 24 * Copyright (c) 2016 by Delphix. All rights reserved.
24 25 */
25 26
26 27 #ifndef _SYS_DDT_H
27 28 #define _SYS_DDT_H
28 29
29 30 #include <sys/sysmacros.h>
30 31 #include <sys/types.h>
31 32 #include <sys/fs/zfs.h>
32 33 #include <sys/zio.h>
33 34 #include <sys/dmu.h>
34 35
35 36 #ifdef __cplusplus
36 37 extern "C" {
37 38 #endif
38 39
39 40 struct abd;
40 41
41 42 /*
42 43 * On-disk DDT formats, in the desired search order (newest version first).
43 44 */
44 45 enum ddt_type {
45 46 DDT_TYPE_ZAP = 0,
46 47 DDT_TYPES
47 48 };
48 49
|
↓ open down ↓ |
16 lines elided |
↑ open up ↑ |
49 50 /*
50 51 * DDT classes, in the desired search order (highest replication level first).
51 52 */
52 53 enum ddt_class {
53 54 DDT_CLASS_DITTO = 0,
54 55 DDT_CLASS_DUPLICATE,
55 56 DDT_CLASS_UNIQUE,
56 57 DDT_CLASSES
57 58 };
58 59
60 +/*
61 + * Tracks whether a DDE is loading or already loaded and
62 + * which entries got removed from dedup path to support dedup ceiling
63 + */
64 +enum dde_state {
65 + DDE_LOADING = (1 << 0),
66 + DDE_LOADED = (1 << 1),
67 + DDE_NEW = (1 << 2),
68 + DDE_DONT_SYNC = (1 << 3),
69 +};
70 +
59 71 #define DDT_TYPE_CURRENT 0
60 72
61 73 #define DDT_COMPRESS_BYTEORDER_MASK 0x80
62 74 #define DDT_COMPRESS_FUNCTION_MASK 0x7f
63 75
64 76 /*
65 77 * On-disk ddt entry: key (name) and physical storage (value).
66 78 */
67 79 typedef struct ddt_key {
68 80 zio_cksum_t ddk_cksum; /* 256-bit block checksum */
69 81 /*
70 82 * Encoded with logical & physical size, and compression, as follows:
71 83 * +-------+-------+-------+-------+-------+-------+-------+-------+
72 84 * | 0 | 0 | 0 | comp | PSIZE | LSIZE |
73 85 * +-------+-------+-------+-------+-------+-------+-------+-------+
74 86 */
75 87 uint64_t ddk_prop;
76 88 } ddt_key_t;
77 89
78 90 #define DDK_GET_LSIZE(ddk) \
79 91 BF64_GET_SB((ddk)->ddk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1)
80 92 #define DDK_SET_LSIZE(ddk, x) \
81 93 BF64_SET_SB((ddk)->ddk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x)
82 94
83 95 #define DDK_GET_PSIZE(ddk) \
84 96 BF64_GET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1)
85 97 #define DDK_SET_PSIZE(ddk, x) \
86 98 BF64_SET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x)
87 99
88 100 #define DDK_GET_COMPRESS(ddk) BF64_GET((ddk)->ddk_prop, 32, 8)
89 101 #define DDK_SET_COMPRESS(ddk, x) BF64_SET((ddk)->ddk_prop, 32, 8, x)
90 102
91 103 #define DDT_KEY_WORDS (sizeof (ddt_key_t) / sizeof (uint64_t))
92 104
93 105 typedef struct ddt_phys {
94 106 dva_t ddp_dva[SPA_DVAS_PER_BP];
95 107 uint64_t ddp_refcnt;
96 108 uint64_t ddp_phys_birth;
97 109 } ddt_phys_t;
98 110
99 111 enum ddt_phys_type {
100 112 DDT_PHYS_DITTO = 0,
101 113 DDT_PHYS_SINGLE = 1,
102 114 DDT_PHYS_DOUBLE = 2,
103 115 DDT_PHYS_TRIPLE = 3,
|
↓ open down ↓ |
35 lines elided |
↑ open up ↑ |
104 116 DDT_PHYS_TYPES
105 117 };
106 118
107 119 /*
108 120 * In-core ddt entry
109 121 */
110 122 struct ddt_entry {
111 123 ddt_key_t dde_key;
112 124 ddt_phys_t dde_phys[DDT_PHYS_TYPES];
113 125 zio_t *dde_lead_zio[DDT_PHYS_TYPES];
126 + ddt_stat_t dde_lkstat;
114 127 struct abd *dde_repair_abd;
115 128 enum ddt_type dde_type;
116 129 enum ddt_class dde_class;
117 - uint8_t dde_loading;
118 - uint8_t dde_loaded;
130 + uint8_t dde_state;
119 131 kcondvar_t dde_cv;
132 + kmutex_t dde_lock;
120 133 avl_node_t dde_node;
121 134 };
122 135
136 +#define DDT_HASHSZ 0x100
137 +#define DDT_HASHFN(csum) (*((uint8_t *)&(csum).zc_word[0]) & \
138 + (DDT_HASHSZ - 1))
139 +
123 140 /*
124 141 * In-core ddt
125 142 */
126 143 struct ddt {
127 - kmutex_t ddt_lock;
128 - avl_tree_t ddt_tree;
144 + kmutex_t ddt_lock[DDT_HASHSZ];
145 + avl_tree_t ddt_tree[DDT_HASHSZ];
146 + kmutex_t ddt_repair_lock;
129 147 avl_tree_t ddt_repair_tree;
130 148 enum zio_checksum ddt_checksum;
131 149 spa_t *ddt_spa;
132 150 objset_t *ddt_os;
133 151 uint64_t ddt_stat_object;
134 152 uint64_t ddt_object[DDT_TYPES][DDT_CLASSES];
135 153 ddt_histogram_t ddt_histogram[DDT_TYPES][DDT_CLASSES];
136 154 ddt_histogram_t ddt_histogram_cache[DDT_TYPES][DDT_CLASSES];
137 155 ddt_object_t ddt_object_stats[DDT_TYPES][DDT_CLASSES];
138 156 avl_node_t ddt_node;
139 157 };
140 158
141 159 /*
142 160 * In-core and on-disk bookmark for DDT walks
143 161 */
144 162 typedef struct ddt_bookmark {
145 163 uint64_t ddb_class;
146 164 uint64_t ddb_type;
147 165 uint64_t ddb_checksum;
148 166 uint64_t ddb_cursor;
149 167 } ddt_bookmark_t;
150 168
151 169 /*
152 170 * Ops vector to access a specific DDT object type.
153 171 */
154 172 typedef struct ddt_ops {
155 173 char ddt_op_name[32];
156 174 int (*ddt_op_create)(objset_t *os, uint64_t *object, dmu_tx_t *tx,
157 175 boolean_t prehash);
|
↓ open down ↓ |
19 lines elided |
↑ open up ↑ |
158 176 int (*ddt_op_destroy)(objset_t *os, uint64_t object, dmu_tx_t *tx);
159 177 int (*ddt_op_lookup)(objset_t *os, uint64_t object, ddt_entry_t *dde);
160 178 void (*ddt_op_prefetch)(objset_t *os, uint64_t object,
161 179 ddt_entry_t *dde);
162 180 int (*ddt_op_update)(objset_t *os, uint64_t object, ddt_entry_t *dde,
163 181 dmu_tx_t *tx);
164 182 int (*ddt_op_remove)(objset_t *os, uint64_t object, ddt_entry_t *dde,
165 183 dmu_tx_t *tx);
166 184 int (*ddt_op_walk)(objset_t *os, uint64_t object, ddt_entry_t *dde,
167 185 uint64_t *walk);
168 - uint64_t (*ddt_op_count)(objset_t *os, uint64_t object);
186 + int (*ddt_op_count)(objset_t *os, uint64_t object, uint64_t *count);
169 187 } ddt_ops_t;
170 188
171 189 #define DDT_NAMELEN 80
172 190
173 191 extern void ddt_object_name(ddt_t *ddt, enum ddt_type type,
174 192 enum ddt_class class, char *name);
175 193 extern int ddt_object_walk(ddt_t *ddt, enum ddt_type type,
176 194 enum ddt_class class, uint64_t *walk, ddt_entry_t *dde);
177 -extern uint64_t ddt_object_count(ddt_t *ddt, enum ddt_type type,
178 - enum ddt_class class);
195 +extern int ddt_object_count(ddt_t *ddt, enum ddt_type type,
196 + enum ddt_class class, uint64_t *count);
179 197 extern int ddt_object_info(ddt_t *ddt, enum ddt_type type,
180 198 enum ddt_class class, dmu_object_info_t *);
181 199 extern boolean_t ddt_object_exists(ddt_t *ddt, enum ddt_type type,
182 200 enum ddt_class class);
183 201
184 202 extern void ddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp,
185 203 uint64_t txg);
186 204 extern void ddt_bp_create(enum zio_checksum checksum, const ddt_key_t *ddk,
187 205 const ddt_phys_t *ddp, blkptr_t *bp);
188 206
189 207 extern void ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp);
190 208
191 209 extern void ddt_phys_fill(ddt_phys_t *ddp, const blkptr_t *bp);
192 210 extern void ddt_phys_clear(ddt_phys_t *ddp);
193 211 extern void ddt_phys_addref(ddt_phys_t *ddp);
194 212 extern void ddt_phys_decref(ddt_phys_t *ddp);
195 213 extern void ddt_phys_free(ddt_t *ddt, ddt_key_t *ddk, ddt_phys_t *ddp,
196 214 uint64_t txg);
197 215 extern ddt_phys_t *ddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp);
198 216 extern uint64_t ddt_phys_total_refcnt(const ddt_entry_t *dde);
199 217
200 218 extern void ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg);
201 219
202 220 extern void ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src);
203 221 extern void ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh);
204 222 extern boolean_t ddt_histogram_empty(const ddt_histogram_t *ddh);
205 223 extern void ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo);
206 224 extern void ddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh);
207 225 extern void ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total);
208 226
209 227 extern uint64_t ddt_get_dedup_dspace(spa_t *spa);
|
↓ open down ↓ |
21 lines elided |
↑ open up ↑ |
210 228 extern uint64_t ddt_get_pool_dedup_ratio(spa_t *spa);
211 229
212 230 extern int ddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde,
213 231 ddt_phys_t *ddp_willref);
214 232 extern int ddt_ditto_copies_present(ddt_entry_t *dde);
215 233
216 234 extern size_t ddt_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len);
217 235 extern void ddt_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len);
218 236
219 237 extern ddt_t *ddt_select(spa_t *spa, const blkptr_t *bp);
220 -extern void ddt_enter(ddt_t *ddt);
221 -extern void ddt_exit(ddt_t *ddt);
238 +extern void ddt_enter(ddt_t *ddt, uint8_t hash);
239 +extern void ddt_exit(ddt_t *ddt, uint8_t hash);
240 +extern void dde_enter(ddt_entry_t *dde);
241 +extern void dde_exit(ddt_entry_t *dde);
222 242 extern ddt_entry_t *ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add);
223 243 extern void ddt_prefetch(spa_t *spa, const blkptr_t *bp);
224 244 extern void ddt_remove(ddt_t *ddt, ddt_entry_t *dde);
225 245
226 246 extern boolean_t ddt_class_contains(spa_t *spa, enum ddt_class max_class,
227 247 const blkptr_t *bp);
228 248
229 249 extern ddt_entry_t *ddt_repair_start(ddt_t *ddt, const blkptr_t *bp);
230 250 extern void ddt_repair_done(ddt_t *ddt, ddt_entry_t *dde);
231 251
232 252 extern int ddt_entry_compare(const void *x1, const void *x2);
233 253
234 254 extern void ddt_create(spa_t *spa);
235 255 extern int ddt_load(spa_t *spa);
236 256 extern void ddt_unload(spa_t *spa);
237 257 extern void ddt_sync(spa_t *spa, uint64_t txg);
238 258 extern int ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde);
239 259 extern int ddt_object_update(ddt_t *ddt, enum ddt_type type,
240 260 enum ddt_class class, ddt_entry_t *dde, dmu_tx_t *tx);
261 +extern void ddt_init(void);
262 +extern void ddt_fini(void);
241 263
242 264 extern const ddt_ops_t ddt_zap_ops;
243 265
244 266 #ifdef __cplusplus
245 267 }
246 268 #endif
247 269
248 270 #endif /* _SYS_DDT_H */
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX