1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
24 * Copyright (c) 2016 by Delphix. All rights reserved.
25 */
26
27 #ifndef _SYS_DDT_H
28 #define _SYS_DDT_H
29
30 #include <sys/sysmacros.h>
31 #include <sys/types.h>
32 #include <sys/fs/zfs.h>
33 #include <sys/zio.h>
34 #include <sys/dmu.h>
35
36 #ifdef __cplusplus
37 extern "C" {
38 #endif
39
40 struct abd;
41
42 /*
43 * On-disk DDT formats, in the desired search order (newest version first).
44 */
45 enum ddt_type {
46 DDT_TYPE_ZAP = 0,
47 DDT_TYPES
48 };
49
50 /*
51 * DDT classes, in the desired search order (highest replication level first).
52 */
53 enum ddt_class {
54 DDT_CLASS_DITTO = 0,
55 DDT_CLASS_DUPLICATE,
56 DDT_CLASS_UNIQUE,
57 DDT_CLASSES
58 };
59
60 /*
61 * Tracks whether a DDE is loading or already loaded and
62 * which entries got removed from dedup path to support dedup ceiling
63 */
64 enum dde_state {
65 DDE_LOADING = (1 << 0),
66 DDE_LOADED = (1 << 1),
67 DDE_NEW = (1 << 2),
68 DDE_DONT_SYNC = (1 << 3),
69 };
70
71 #define DDT_TYPE_CURRENT 0
72
73 #define DDT_COMPRESS_BYTEORDER_MASK 0x80
74 #define DDT_COMPRESS_FUNCTION_MASK 0x7f
75
76 /*
77 * On-disk ddt entry: key (name) and physical storage (value).
78 */
79 typedef struct ddt_key {
80 zio_cksum_t ddk_cksum; /* 256-bit block checksum */
81 /*
82 * Encoded with logical & physical size, and compression, as follows:
83 * +-------+-------+-------+-------+-------+-------+-------+-------+
84 * | 0 | 0 | 0 | comp | PSIZE | LSIZE |
85 * +-------+-------+-------+-------+-------+-------+-------+-------+
86 */
87 uint64_t ddk_prop;
88 } ddt_key_t;
89
90 #define DDK_GET_LSIZE(ddk) \
91 BF64_GET_SB((ddk)->ddk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1)
92 #define DDK_SET_LSIZE(ddk, x) \
93 BF64_SET_SB((ddk)->ddk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x)
94
95 #define DDK_GET_PSIZE(ddk) \
96 BF64_GET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1)
97 #define DDK_SET_PSIZE(ddk, x) \
98 BF64_SET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x)
99
100 #define DDK_GET_COMPRESS(ddk) BF64_GET((ddk)->ddk_prop, 32, 8)
101 #define DDK_SET_COMPRESS(ddk, x) BF64_SET((ddk)->ddk_prop, 32, 8, x)
102
103 #define DDT_KEY_WORDS (sizeof (ddt_key_t) / sizeof (uint64_t))
104
105 typedef struct ddt_phys {
106 dva_t ddp_dva[SPA_DVAS_PER_BP];
107 uint64_t ddp_refcnt;
108 uint64_t ddp_phys_birth;
109 } ddt_phys_t;
110
111 enum ddt_phys_type {
112 DDT_PHYS_DITTO = 0,
113 DDT_PHYS_SINGLE = 1,
114 DDT_PHYS_DOUBLE = 2,
115 DDT_PHYS_TRIPLE = 3,
116 DDT_PHYS_TYPES
117 };
118
119 /*
120 * In-core ddt entry
121 */
122 struct ddt_entry {
123 ddt_key_t dde_key;
124 ddt_phys_t dde_phys[DDT_PHYS_TYPES];
125 zio_t *dde_lead_zio[DDT_PHYS_TYPES];
126 ddt_stat_t dde_lkstat;
127 struct abd *dde_repair_abd;
128 enum ddt_type dde_type;
129 enum ddt_class dde_class;
130 uint8_t dde_state;
131 kcondvar_t dde_cv;
132 kmutex_t dde_lock;
133 avl_node_t dde_node;
134 };
135
136 #define DDT_HASHSZ 0x100
137 #define DDT_HASHFN(csum) (*((uint8_t *)&(csum).zc_word[0]) & \
138 (DDT_HASHSZ - 1))
139
140 /*
141 * In-core ddt
142 */
143 struct ddt {
144 kmutex_t ddt_lock[DDT_HASHSZ];
145 avl_tree_t ddt_tree[DDT_HASHSZ];
146 kmutex_t ddt_repair_lock;
147 avl_tree_t ddt_repair_tree;
148 enum zio_checksum ddt_checksum;
149 spa_t *ddt_spa;
150 objset_t *ddt_os;
151 uint64_t ddt_stat_object;
152 uint64_t ddt_object[DDT_TYPES][DDT_CLASSES];
153 ddt_histogram_t ddt_histogram[DDT_TYPES][DDT_CLASSES];
154 ddt_histogram_t ddt_histogram_cache[DDT_TYPES][DDT_CLASSES];
155 ddt_object_t ddt_object_stats[DDT_TYPES][DDT_CLASSES];
156 avl_node_t ddt_node;
157 };
158
159 /*
160 * In-core and on-disk bookmark for DDT walks
161 */
162 typedef struct ddt_bookmark {
163 uint64_t ddb_class;
164 uint64_t ddb_type;
165 uint64_t ddb_checksum;
166 uint64_t ddb_cursor;
167 } ddt_bookmark_t;
168
169 /*
170 * Ops vector to access a specific DDT object type.
171 */
172 typedef struct ddt_ops {
173 char ddt_op_name[32];
174 int (*ddt_op_create)(objset_t *os, uint64_t *object, dmu_tx_t *tx,
175 boolean_t prehash);
176 int (*ddt_op_destroy)(objset_t *os, uint64_t object, dmu_tx_t *tx);
177 int (*ddt_op_lookup)(objset_t *os, uint64_t object, ddt_entry_t *dde);
178 void (*ddt_op_prefetch)(objset_t *os, uint64_t object,
179 ddt_entry_t *dde);
180 int (*ddt_op_update)(objset_t *os, uint64_t object, ddt_entry_t *dde,
181 dmu_tx_t *tx);
182 int (*ddt_op_remove)(objset_t *os, uint64_t object, ddt_entry_t *dde,
183 dmu_tx_t *tx);
184 int (*ddt_op_walk)(objset_t *os, uint64_t object, ddt_entry_t *dde,
185 uint64_t *walk);
186 int (*ddt_op_count)(objset_t *os, uint64_t object, uint64_t *count);
187 } ddt_ops_t;
188
189 #define DDT_NAMELEN 80
190
191 extern void ddt_object_name(ddt_t *ddt, enum ddt_type type,
192 enum ddt_class class, char *name);
193 extern int ddt_object_walk(ddt_t *ddt, enum ddt_type type,
194 enum ddt_class class, uint64_t *walk, ddt_entry_t *dde);
195 extern int ddt_object_count(ddt_t *ddt, enum ddt_type type,
196 enum ddt_class class, uint64_t *count);
197 extern int ddt_object_info(ddt_t *ddt, enum ddt_type type,
198 enum ddt_class class, dmu_object_info_t *);
199 extern boolean_t ddt_object_exists(ddt_t *ddt, enum ddt_type type,
200 enum ddt_class class);
201
202 extern void ddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp,
203 uint64_t txg);
204 extern void ddt_bp_create(enum zio_checksum checksum, const ddt_key_t *ddk,
205 const ddt_phys_t *ddp, blkptr_t *bp);
206
207 extern void ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp);
208
209 extern void ddt_phys_fill(ddt_phys_t *ddp, const blkptr_t *bp);
210 extern void ddt_phys_clear(ddt_phys_t *ddp);
211 extern void ddt_phys_addref(ddt_phys_t *ddp);
212 extern void ddt_phys_decref(ddt_phys_t *ddp);
213 extern void ddt_phys_free(ddt_t *ddt, ddt_key_t *ddk, ddt_phys_t *ddp,
214 uint64_t txg);
215 extern ddt_phys_t *ddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp);
216 extern uint64_t ddt_phys_total_refcnt(const ddt_entry_t *dde);
217
218 extern void ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg);
219
220 extern void ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src);
221 extern void ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh);
222 extern boolean_t ddt_histogram_empty(const ddt_histogram_t *ddh);
223 extern void ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo);
224 extern void ddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh);
225 extern void ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total);
226
227 extern uint64_t ddt_get_dedup_dspace(spa_t *spa);
228 extern uint64_t ddt_get_pool_dedup_ratio(spa_t *spa);
229
230 extern int ddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde,
231 ddt_phys_t *ddp_willref);
232 extern int ddt_ditto_copies_present(ddt_entry_t *dde);
233
234 extern size_t ddt_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len);
235 extern void ddt_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len);
236
237 extern ddt_t *ddt_select(spa_t *spa, const blkptr_t *bp);
238 extern void ddt_enter(ddt_t *ddt, uint8_t hash);
239 extern void ddt_exit(ddt_t *ddt, uint8_t hash);
240 extern void dde_enter(ddt_entry_t *dde);
241 extern void dde_exit(ddt_entry_t *dde);
242 extern ddt_entry_t *ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add);
243 extern void ddt_prefetch(spa_t *spa, const blkptr_t *bp);
244 extern void ddt_remove(ddt_t *ddt, ddt_entry_t *dde);
245
246 extern boolean_t ddt_class_contains(spa_t *spa, enum ddt_class max_class,
247 const blkptr_t *bp);
248
249 extern ddt_entry_t *ddt_repair_start(ddt_t *ddt, const blkptr_t *bp);
250 extern void ddt_repair_done(ddt_t *ddt, ddt_entry_t *dde);
251
252 extern int ddt_entry_compare(const void *x1, const void *x2);
253
254 extern void ddt_create(spa_t *spa);
255 extern int ddt_load(spa_t *spa);
256 extern void ddt_unload(spa_t *spa);
257 extern void ddt_sync(spa_t *spa, uint64_t txg);
258 extern int ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde);
259 extern int ddt_object_update(ddt_t *ddt, enum ddt_type type,
260 enum ddt_class class, ddt_entry_t *dde, dmu_tx_t *tx);
261 extern void ddt_init(void);
262 extern void ddt_fini(void);
263
264 extern const ddt_ops_t ddt_zap_ops;
265
266 #ifdef __cplusplus
267 }
268 #endif
269
270 #endif /* _SYS_DDT_H */