Print this page
*** NO COMMENTS ***
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/zfs/dmu_send.c
+++ new/usr/src/uts/common/fs/zfs/dmu_send.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
|
↓ open down ↓ |
12 lines elided |
↑ open up ↑ |
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 - * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
23 + * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
24 24 * Copyright (c) 2012 by Delphix. All rights reserved.
25 25 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
26 26 */
27 27
28 28 #include <sys/dmu.h>
29 29 #include <sys/dmu_impl.h>
30 30 #include <sys/dmu_tx.h>
31 31 #include <sys/dbuf.h>
32 32 #include <sys/dnode.h>
33 33 #include <sys/zfs_context.h>
34 34 #include <sys/dmu_objset.h>
35 35 #include <sys/dmu_traverse.h>
36 36 #include <sys/dsl_dataset.h>
37 37 #include <sys/dsl_dir.h>
38 38 #include <sys/dsl_prop.h>
39 39 #include <sys/dsl_pool.h>
40 40 #include <sys/dsl_synctask.h>
41 41 #include <sys/zfs_ioctl.h>
42 42 #include <sys/zap.h>
43 43 #include <sys/zio_checksum.h>
44 44 #include <sys/zfs_znode.h>
45 45 #include <zfs_fletcher.h>
46 46 #include <sys/avl.h>
47 47 #include <sys/ddt.h>
48 48 #include <sys/zfs_onexit.h>
49 49
50 50 /* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */
51 51 int zfs_send_corrupt_data = B_FALSE;
|
↓ open down ↓ |
18 lines elided |
↑ open up ↑ |
52 52
53 53 static char *dmu_recv_tag = "dmu_recv_tag";
54 54
55 55 static int
56 56 dump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
57 57 {
58 58 dsl_dataset_t *ds = dsp->dsa_os->os_dsl_dataset;
59 59 ssize_t resid; /* have to get resid to get detailed errno */
60 60 ASSERT3U(len % 8, ==, 0);
61 61
62 - fletcher_4_incremental_native(buf, len, &dsp->dsa_zc);
63 - dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp,
64 - (caddr_t)buf, len,
65 - 0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid);
66 -
62 + dsp->dsa_err = 0;
63 + if (!dsp->sendsize) {
64 + fletcher_4_incremental_native(buf, len, &dsp->dsa_zc);
65 + dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp,
66 + (caddr_t)buf, len,
67 + 0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY,
68 + CRED(), &resid);
69 + }
67 70 mutex_enter(&ds->ds_sendstream_lock);
68 71 *dsp->dsa_off += len;
69 72 mutex_exit(&ds->ds_sendstream_lock);
70 73
71 74 return (dsp->dsa_err);
72 75 }
73 76
74 77 static int
75 78 dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
76 79 uint64_t length)
77 80 {
78 81 struct drr_free *drrf = &(dsp->dsa_drr->drr_u.drr_free);
79 82
80 83 if (length != -1ULL && offset + length < offset)
81 84 length = -1ULL;
82 85
83 86 /*
84 87 * If there is a pending op, but it's not PENDING_FREE, push it out,
85 88 * since free block aggregation can only be done for blocks of the
86 89 * same type (i.e., DRR_FREE records can only be aggregated with
87 90 * other DRR_FREE records. DRR_FREEOBJECTS records can only be
88 91 * aggregated with other DRR_FREEOBJECTS records.
89 92 */
90 93 if (dsp->dsa_pending_op != PENDING_NONE &&
91 94 dsp->dsa_pending_op != PENDING_FREE) {
92 95 if (dump_bytes(dsp, dsp->dsa_drr,
93 96 sizeof (dmu_replay_record_t)) != 0)
94 97 return (EINTR);
95 98 dsp->dsa_pending_op = PENDING_NONE;
96 99 }
97 100
98 101 if (dsp->dsa_pending_op == PENDING_FREE) {
99 102 /*
100 103 * There should never be a PENDING_FREE if length is -1
101 104 * (because dump_dnode is the only place where this
102 105 * function is called with a -1, and only after flushing
103 106 * any pending record).
104 107 */
105 108 ASSERT(length != -1ULL);
106 109 /*
107 110 * Check to see whether this free block can be aggregated
108 111 * with pending one.
109 112 */
110 113 if (drrf->drr_object == object && drrf->drr_offset +
111 114 drrf->drr_length == offset) {
112 115 drrf->drr_length += length;
113 116 return (0);
114 117 } else {
115 118 /* not a continuation. Push out pending record */
116 119 if (dump_bytes(dsp, dsp->dsa_drr,
117 120 sizeof (dmu_replay_record_t)) != 0)
118 121 return (EINTR);
119 122 dsp->dsa_pending_op = PENDING_NONE;
120 123 }
121 124 }
122 125 /* create a FREE record and make it pending */
123 126 bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
124 127 dsp->dsa_drr->drr_type = DRR_FREE;
125 128 drrf->drr_object = object;
126 129 drrf->drr_offset = offset;
127 130 drrf->drr_length = length;
128 131 drrf->drr_toguid = dsp->dsa_toguid;
129 132 if (length == -1ULL) {
130 133 if (dump_bytes(dsp, dsp->dsa_drr,
131 134 sizeof (dmu_replay_record_t)) != 0)
132 135 return (EINTR);
133 136 } else {
134 137 dsp->dsa_pending_op = PENDING_FREE;
135 138 }
136 139
137 140 return (0);
138 141 }
139 142
140 143 static int
141 144 dump_data(dmu_sendarg_t *dsp, dmu_object_type_t type,
142 145 uint64_t object, uint64_t offset, int blksz, const blkptr_t *bp, void *data)
143 146 {
144 147 struct drr_write *drrw = &(dsp->dsa_drr->drr_u.drr_write);
145 148
146 149
147 150 /*
148 151 * If there is any kind of pending aggregation (currently either
149 152 * a grouping of free objects or free blocks), push it out to
150 153 * the stream, since aggregation can't be done across operations
151 154 * of different types.
152 155 */
153 156 if (dsp->dsa_pending_op != PENDING_NONE) {
154 157 if (dump_bytes(dsp, dsp->dsa_drr,
155 158 sizeof (dmu_replay_record_t)) != 0)
156 159 return (EINTR);
157 160 dsp->dsa_pending_op = PENDING_NONE;
158 161 }
159 162 /* write a DATA record */
160 163 bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
161 164 dsp->dsa_drr->drr_type = DRR_WRITE;
162 165 drrw->drr_object = object;
163 166 drrw->drr_type = type;
164 167 drrw->drr_offset = offset;
165 168 drrw->drr_length = blksz;
166 169 drrw->drr_toguid = dsp->dsa_toguid;
167 170 drrw->drr_checksumtype = BP_GET_CHECKSUM(bp);
168 171 if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup)
169 172 drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP;
170 173 DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp));
171 174 DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp));
172 175 DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp));
173 176 drrw->drr_key.ddk_cksum = bp->blk_cksum;
174 177
175 178 if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
176 179 return (EINTR);
177 180 if (dump_bytes(dsp, data, blksz) != 0)
178 181 return (EINTR);
179 182 return (0);
180 183 }
181 184
182 185 static int
183 186 dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data)
184 187 {
185 188 struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill);
186 189
187 190 if (dsp->dsa_pending_op != PENDING_NONE) {
188 191 if (dump_bytes(dsp, dsp->dsa_drr,
189 192 sizeof (dmu_replay_record_t)) != 0)
190 193 return (EINTR);
191 194 dsp->dsa_pending_op = PENDING_NONE;
192 195 }
193 196
194 197 /* write a SPILL record */
195 198 bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
196 199 dsp->dsa_drr->drr_type = DRR_SPILL;
197 200 drrs->drr_object = object;
198 201 drrs->drr_length = blksz;
199 202 drrs->drr_toguid = dsp->dsa_toguid;
200 203
201 204 if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)))
202 205 return (EINTR);
203 206 if (dump_bytes(dsp, data, blksz))
204 207 return (EINTR);
205 208 return (0);
206 209 }
207 210
208 211 static int
209 212 dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs)
210 213 {
211 214 struct drr_freeobjects *drrfo = &(dsp->dsa_drr->drr_u.drr_freeobjects);
212 215
213 216 /*
214 217 * If there is a pending op, but it's not PENDING_FREEOBJECTS,
215 218 * push it out, since free block aggregation can only be done for
216 219 * blocks of the same type (i.e., DRR_FREE records can only be
217 220 * aggregated with other DRR_FREE records. DRR_FREEOBJECTS records
218 221 * can only be aggregated with other DRR_FREEOBJECTS records.
219 222 */
220 223 if (dsp->dsa_pending_op != PENDING_NONE &&
221 224 dsp->dsa_pending_op != PENDING_FREEOBJECTS) {
222 225 if (dump_bytes(dsp, dsp->dsa_drr,
223 226 sizeof (dmu_replay_record_t)) != 0)
224 227 return (EINTR);
225 228 dsp->dsa_pending_op = PENDING_NONE;
226 229 }
227 230 if (dsp->dsa_pending_op == PENDING_FREEOBJECTS) {
228 231 /*
229 232 * See whether this free object array can be aggregated
230 233 * with pending one
231 234 */
232 235 if (drrfo->drr_firstobj + drrfo->drr_numobjs == firstobj) {
233 236 drrfo->drr_numobjs += numobjs;
234 237 return (0);
235 238 } else {
236 239 /* can't be aggregated. Push out pending record */
237 240 if (dump_bytes(dsp, dsp->dsa_drr,
238 241 sizeof (dmu_replay_record_t)) != 0)
239 242 return (EINTR);
240 243 dsp->dsa_pending_op = PENDING_NONE;
241 244 }
242 245 }
243 246
244 247 /* write a FREEOBJECTS record */
245 248 bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
246 249 dsp->dsa_drr->drr_type = DRR_FREEOBJECTS;
247 250 drrfo->drr_firstobj = firstobj;
248 251 drrfo->drr_numobjs = numobjs;
249 252 drrfo->drr_toguid = dsp->dsa_toguid;
250 253
251 254 dsp->dsa_pending_op = PENDING_FREEOBJECTS;
252 255
253 256 return (0);
254 257 }
255 258
256 259 static int
257 260 dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp)
258 261 {
259 262 struct drr_object *drro = &(dsp->dsa_drr->drr_u.drr_object);
260 263
261 264 if (dnp == NULL || dnp->dn_type == DMU_OT_NONE)
262 265 return (dump_freeobjects(dsp, object, 1));
263 266
264 267 if (dsp->dsa_pending_op != PENDING_NONE) {
265 268 if (dump_bytes(dsp, dsp->dsa_drr,
266 269 sizeof (dmu_replay_record_t)) != 0)
267 270 return (EINTR);
268 271 dsp->dsa_pending_op = PENDING_NONE;
269 272 }
270 273
271 274 /* write an OBJECT record */
272 275 bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
273 276 dsp->dsa_drr->drr_type = DRR_OBJECT;
274 277 drro->drr_object = object;
275 278 drro->drr_type = dnp->dn_type;
276 279 drro->drr_bonustype = dnp->dn_bonustype;
277 280 drro->drr_blksz = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT;
278 281 drro->drr_bonuslen = dnp->dn_bonuslen;
279 282 drro->drr_checksumtype = dnp->dn_checksum;
280 283 drro->drr_compress = dnp->dn_compress;
281 284 drro->drr_toguid = dsp->dsa_toguid;
282 285
283 286 if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
284 287 return (EINTR);
285 288
286 289 if (dump_bytes(dsp, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0)
287 290 return (EINTR);
288 291
289 292 /* free anything past the end of the file */
290 293 if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) *
291 294 (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL))
292 295 return (EINTR);
293 296 if (dsp->dsa_err)
294 297 return (EINTR);
295 298 return (0);
296 299 }
297 300
298 301 #define BP_SPAN(dnp, level) \
299 302 (((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \
300 303 (level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT)))
301 304
302 305 /* ARGSUSED */
303 306 static int
304 307 backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
305 308 const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
306 309 {
307 310 dmu_sendarg_t *dsp = arg;
308 311 dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE;
309 312 int err = 0;
310 313
311 314 if (issig(JUSTLOOKING) && issig(FORREAL))
312 315 return (EINTR);
313 316
314 317 if (zb->zb_object != DMU_META_DNODE_OBJECT &&
315 318 DMU_OBJECT_IS_SPECIAL(zb->zb_object)) {
316 319 return (0);
317 320 } else if (bp == NULL && zb->zb_object == DMU_META_DNODE_OBJECT) {
318 321 uint64_t span = BP_SPAN(dnp, zb->zb_level);
319 322 uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT;
320 323 err = dump_freeobjects(dsp, dnobj, span >> DNODE_SHIFT);
321 324 } else if (bp == NULL) {
322 325 uint64_t span = BP_SPAN(dnp, zb->zb_level);
323 326 err = dump_free(dsp, zb->zb_object, zb->zb_blkid * span, span);
324 327 } else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) {
325 328 return (0);
326 329 } else if (type == DMU_OT_DNODE) {
327 330 dnode_phys_t *blk;
328 331 int i;
329 332 int blksz = BP_GET_LSIZE(bp);
330 333 uint32_t aflags = ARC_WAIT;
331 334 arc_buf_t *abuf;
332 335
333 336 if (dsl_read(NULL, spa, bp, pbuf,
334 337 arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
335 338 ZIO_FLAG_CANFAIL, &aflags, zb) != 0)
336 339 return (EIO);
337 340
338 341 blk = abuf->b_data;
339 342 for (i = 0; i < blksz >> DNODE_SHIFT; i++) {
340 343 uint64_t dnobj = (zb->zb_blkid <<
341 344 (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i;
342 345 err = dump_dnode(dsp, dnobj, blk+i);
343 346 if (err)
344 347 break;
345 348 }
346 349 (void) arc_buf_remove_ref(abuf, &abuf);
347 350 } else if (type == DMU_OT_SA) {
348 351 uint32_t aflags = ARC_WAIT;
349 352 arc_buf_t *abuf;
350 353 int blksz = BP_GET_LSIZE(bp);
|
↓ open down ↓ |
274 lines elided |
↑ open up ↑ |
351 354
352 355 if (arc_read_nolock(NULL, spa, bp,
353 356 arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
354 357 ZIO_FLAG_CANFAIL, &aflags, zb) != 0)
355 358 return (EIO);
356 359
357 360 err = dump_spill(dsp, zb->zb_object, blksz, abuf->b_data);
358 361 (void) arc_buf_remove_ref(abuf, &abuf);
359 362 } else { /* it's a level-0 block of a regular object */
360 363 uint32_t aflags = ARC_WAIT;
361 - arc_buf_t *abuf;
364 + arc_buf_t *abuf = NULL;
365 + void *buf = NULL;
362 366 int blksz = BP_GET_LSIZE(bp);
363 367
364 - if (dsl_read(NULL, spa, bp, pbuf,
365 - arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
366 - ZIO_FLAG_CANFAIL, &aflags, zb) != 0) {
367 - if (zfs_send_corrupt_data) {
368 + if (!dsp->sendsize) {
369 + if (dsl_read(NULL, spa, bp, pbuf,
370 + arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
371 + ZIO_FLAG_CANFAIL, &aflags, zb) != 0) {
372 + if (zfs_send_corrupt_data) {
368 373 /* Send a block filled with 0x"zfs badd bloc" */
369 - abuf = arc_buf_alloc(spa, blksz, &abuf,
370 - ARC_BUFC_DATA);
371 - uint64_t *ptr;
372 - for (ptr = abuf->b_data;
373 - (char *)ptr < (char *)abuf->b_data + blksz;
374 - ptr++)
375 - *ptr = 0x2f5baddb10c;
376 - } else {
377 - return (EIO);
374 + abuf = arc_buf_alloc(spa, blksz, &abuf,
375 + ARC_BUFC_DATA);
376 + uint64_t *ptr;
377 + for (ptr = abuf->b_data;
378 + (char *)ptr <
379 + (char *)abuf->b_data + blksz;
380 + ptr++)
381 + *ptr = 0x2f5baddb10c;
382 + } else {
383 + return (EIO);
384 + }
378 385 }
386 + buf = abuf->b_data;
379 387 }
380 388
381 389 err = dump_data(dsp, type, zb->zb_object, zb->zb_blkid * blksz,
382 - blksz, bp, abuf->b_data);
383 - (void) arc_buf_remove_ref(abuf, &abuf);
390 + blksz, bp, buf);
391 + if (!dsp->sendsize) {
392 + (void) arc_buf_remove_ref(abuf, &abuf);
393 + }
384 394 }
385 395
386 396 ASSERT(err == 0 || err == EINTR);
387 397 return (err);
388 398 }
389 399
390 400 /*
391 401 * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline.
392 402 * For example, they could both be snapshots of the same filesystem, and
393 403 * 'earlier' is before 'later'. Or 'earlier' could be the origin of
394 404 * 'later's filesystem. Or 'earlier' could be an older snapshot in the origin's
395 405 * filesystem. Or 'earlier' could be the origin's origin.
396 406 */
397 407 static boolean_t
398 408 is_before(dsl_dataset_t *later, dsl_dataset_t *earlier)
399 409 {
400 410 dsl_pool_t *dp = later->ds_dir->dd_pool;
401 411 int error;
402 412 boolean_t ret;
403 413 dsl_dataset_t *origin;
404 414
405 415 if (earlier->ds_phys->ds_creation_txg >=
406 416 later->ds_phys->ds_creation_txg)
407 417 return (B_FALSE);
408 418
409 419 if (later->ds_dir == earlier->ds_dir)
410 420 return (B_TRUE);
411 421 if (!dsl_dir_is_clone(later->ds_dir))
412 422 return (B_FALSE);
413 423
414 424 rw_enter(&dp->dp_config_rwlock, RW_READER);
415 425 if (later->ds_dir->dd_phys->dd_origin_obj == earlier->ds_object) {
416 426 rw_exit(&dp->dp_config_rwlock);
417 427 return (B_TRUE);
418 428 }
|
↓ open down ↓ |
25 lines elided |
↑ open up ↑ |
419 429 error = dsl_dataset_hold_obj(dp,
420 430 later->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin);
421 431 rw_exit(&dp->dp_config_rwlock);
422 432 if (error != 0)
423 433 return (B_FALSE);
424 434 ret = is_before(origin, earlier);
425 435 dsl_dataset_rele(origin, FTAG);
426 436 return (ret);
427 437 }
428 438
439 +
429 440 int
430 441 dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp,
431 - offset_t *off)
442 + offset_t *off, boolean_t sendsize)
432 443 {
433 444 dsl_dataset_t *ds = tosnap->os_dsl_dataset;
434 445 dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL;
435 446 dmu_replay_record_t *drr;
436 447 dmu_sendarg_t *dsp;
437 448 int err;
438 449 uint64_t fromtxg = 0;
439 450
440 451 /* tosnap must be a snapshot */
441 452 if (ds->ds_phys->ds_next_snap_obj == 0)
442 453 return (EINVAL);
443 454
444 455 /*
445 456 * fromsnap must be an earlier snapshot from the same fs as tosnap,
446 457 * or the origin's fs.
447 458 */
448 459 if (fromds != NULL && !is_before(ds, fromds))
449 460 return (EXDEV);
450 461
451 462 drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
452 463 drr->drr_type = DRR_BEGIN;
453 464 drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
454 465 DMU_SET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo,
455 466 DMU_SUBSTREAM);
456 467
457 468 #ifdef _KERNEL
458 469 if (dmu_objset_type(tosnap) == DMU_OST_ZFS) {
459 470 uint64_t version;
460 471 if (zfs_get_zplprop(tosnap, ZFS_PROP_VERSION, &version) != 0) {
461 472 kmem_free(drr, sizeof (dmu_replay_record_t));
462 473 return (EINVAL);
463 474 }
464 475 if (version == ZPL_VERSION_SA) {
465 476 DMU_SET_FEATUREFLAGS(
466 477 drr->drr_u.drr_begin.drr_versioninfo,
467 478 DMU_BACKUP_FEATURE_SA_SPILL);
468 479 }
469 480 }
470 481 #endif
471 482
472 483 drr->drr_u.drr_begin.drr_creation_time =
473 484 ds->ds_phys->ds_creation_time;
474 485 drr->drr_u.drr_begin.drr_type = tosnap->os_phys->os_type;
475 486 if (fromds != NULL && ds->ds_dir != fromds->ds_dir)
476 487 drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE;
477 488 drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid;
478 489 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
479 490 drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA;
480 491
481 492 if (fromds)
482 493 drr->drr_u.drr_begin.drr_fromguid = fromds->ds_phys->ds_guid;
483 494 dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname);
484 495
485 496 if (fromds)
486 497 fromtxg = fromds->ds_phys->ds_creation_txg;
487 498
488 499 dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP);
|
↓ open down ↓ |
47 lines elided |
↑ open up ↑ |
489 500
490 501 dsp->dsa_drr = drr;
491 502 dsp->dsa_vp = vp;
492 503 dsp->dsa_outfd = outfd;
493 504 dsp->dsa_proc = curproc;
494 505 dsp->dsa_os = tosnap;
495 506 dsp->dsa_off = off;
496 507 dsp->dsa_toguid = ds->ds_phys->ds_guid;
497 508 ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0);
498 509 dsp->dsa_pending_op = PENDING_NONE;
510 + dsp->sendsize = sendsize;
499 511
500 512 mutex_enter(&ds->ds_sendstream_lock);
501 513 list_insert_head(&ds->ds_sendstreams, dsp);
502 514 mutex_exit(&ds->ds_sendstream_lock);
503 515
504 516 if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
505 517 err = dsp->dsa_err;
506 518 goto out;
507 519 }
508 520
509 - err = traverse_dataset(ds, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH,
510 - backup_cb, dsp);
521 + if (dsp->sendsize) {
522 + err = traverse_dataset(ds, fromtxg,
523 + TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
524 + backup_cb, dsp);
525 + } else {
526 + err = traverse_dataset(ds,
527 + fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH,
528 + backup_cb, dsp);
529 + }
511 530
512 531 if (dsp->dsa_pending_op != PENDING_NONE)
513 532 if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0)
514 533 err = EINTR;
515 534
516 535 if (err) {
517 536 if (err == EINTR && dsp->dsa_err)
518 537 err = dsp->dsa_err;
519 538 goto out;
520 539 }
521 540
522 541 bzero(drr, sizeof (dmu_replay_record_t));
523 542 drr->drr_type = DRR_END;
524 543 drr->drr_u.drr_end.drr_checksum = dsp->dsa_zc;
525 544 drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid;
526 545
527 546 if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
528 547 err = dsp->dsa_err;
529 548 goto out;
530 549 }
531 550
532 551 out:
533 552 mutex_enter(&ds->ds_sendstream_lock);
534 553 list_remove(&ds->ds_sendstreams, dsp);
535 554 mutex_exit(&ds->ds_sendstream_lock);
536 555
537 556 kmem_free(drr, sizeof (dmu_replay_record_t));
538 557 kmem_free(dsp, sizeof (dmu_sendarg_t));
539 558
540 559 return (err);
541 560 }
542 561
543 562 int
544 563 dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, uint64_t *sizep)
545 564 {
546 565 dsl_dataset_t *ds = tosnap->os_dsl_dataset;
547 566 dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL;
548 567 dsl_pool_t *dp = ds->ds_dir->dd_pool;
549 568 int err;
550 569 uint64_t size;
551 570
552 571 /* tosnap must be a snapshot */
553 572 if (ds->ds_phys->ds_next_snap_obj == 0)
554 573 return (EINVAL);
555 574
556 575 /*
557 576 * fromsnap must be an earlier snapshot from the same fs as tosnap,
558 577 * or the origin's fs.
559 578 */
560 579 if (fromds != NULL && !is_before(ds, fromds))
561 580 return (EXDEV);
562 581
563 582 /* Get uncompressed size estimate of changed data. */
564 583 if (fromds == NULL) {
565 584 size = ds->ds_phys->ds_uncompressed_bytes;
566 585 } else {
567 586 uint64_t used, comp;
568 587 err = dsl_dataset_space_written(fromds, ds,
569 588 &used, &comp, &size);
570 589 if (err)
571 590 return (err);
572 591 }
573 592
574 593 /*
575 594 * Assume that space (both on-disk and in-stream) is dominated by
576 595 * data. We will adjust for indirect blocks and the copies property,
577 596 * but ignore per-object space used (eg, dnodes and DRR_OBJECT records).
578 597 */
579 598
580 599 /*
581 600 * Subtract out approximate space used by indirect blocks.
582 601 * Assume most space is used by data blocks (non-indirect, non-dnode).
583 602 * Assume all blocks are recordsize. Assume ditto blocks and
584 603 * internal fragmentation counter out compression.
585 604 *
586 605 * Therefore, space used by indirect blocks is sizeof(blkptr_t) per
587 606 * block, which we observe in practice.
588 607 */
589 608 uint64_t recordsize;
590 609 rw_enter(&dp->dp_config_rwlock, RW_READER);
591 610 err = dsl_prop_get_ds(ds, "recordsize",
592 611 sizeof (recordsize), 1, &recordsize, NULL);
593 612 rw_exit(&dp->dp_config_rwlock);
594 613 if (err)
595 614 return (err);
596 615 size -= size / recordsize * sizeof (blkptr_t);
597 616
598 617 /* Add in the space for the record associated with each block. */
599 618 size += size / recordsize * sizeof (dmu_replay_record_t);
600 619
601 620 *sizep = size;
602 621
603 622 return (0);
604 623 }
605 624
606 625 struct recvbeginsyncarg {
607 626 const char *tofs;
608 627 const char *tosnap;
609 628 dsl_dataset_t *origin;
610 629 uint64_t fromguid;
611 630 dmu_objset_type_t type;
612 631 void *tag;
613 632 boolean_t force;
614 633 uint64_t dsflags;
615 634 char clonelastname[MAXNAMELEN];
616 635 dsl_dataset_t *ds; /* the ds to recv into; returned from the syncfunc */
617 636 cred_t *cr;
618 637 };
619 638
620 639 /* ARGSUSED */
621 640 static int
622 641 recv_new_check(void *arg1, void *arg2, dmu_tx_t *tx)
623 642 {
624 643 dsl_dir_t *dd = arg1;
625 644 struct recvbeginsyncarg *rbsa = arg2;
626 645 objset_t *mos = dd->dd_pool->dp_meta_objset;
627 646 uint64_t val;
628 647 int err;
629 648
630 649 err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj,
631 650 strrchr(rbsa->tofs, '/') + 1, sizeof (uint64_t), 1, &val);
632 651
633 652 if (err != ENOENT)
634 653 return (err ? err : EEXIST);
635 654
636 655 if (rbsa->origin) {
637 656 /* make sure it's a snap in the same pool */
638 657 if (rbsa->origin->ds_dir->dd_pool != dd->dd_pool)
639 658 return (EXDEV);
640 659 if (!dsl_dataset_is_snapshot(rbsa->origin))
641 660 return (EINVAL);
642 661 if (rbsa->origin->ds_phys->ds_guid != rbsa->fromguid)
643 662 return (ENODEV);
644 663 }
645 664
646 665 return (0);
647 666 }
648 667
649 668 static void
650 669 recv_new_sync(void *arg1, void *arg2, dmu_tx_t *tx)
651 670 {
652 671 dsl_dir_t *dd = arg1;
653 672 struct recvbeginsyncarg *rbsa = arg2;
654 673 uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags;
655 674 uint64_t dsobj;
656 675
657 676 /* Create and open new dataset. */
658 677 dsobj = dsl_dataset_create_sync(dd, strrchr(rbsa->tofs, '/') + 1,
659 678 rbsa->origin, flags, rbsa->cr, tx);
660 679 VERIFY(0 == dsl_dataset_own_obj(dd->dd_pool, dsobj,
661 680 B_TRUE, dmu_recv_tag, &rbsa->ds));
662 681
663 682 if (rbsa->origin == NULL) {
664 683 (void) dmu_objset_create_impl(dd->dd_pool->dp_spa,
665 684 rbsa->ds, &rbsa->ds->ds_phys->ds_bp, rbsa->type, tx);
666 685 }
667 686
668 687 spa_history_log_internal_ds(rbsa->ds, "receive new", tx, "");
669 688 }
670 689
671 690 /* ARGSUSED */
672 691 static int
673 692 recv_existing_check(void *arg1, void *arg2, dmu_tx_t *tx)
674 693 {
675 694 dsl_dataset_t *ds = arg1;
676 695 struct recvbeginsyncarg *rbsa = arg2;
677 696 int err;
678 697 uint64_t val;
679 698
680 699 /* must not have any changes since most recent snapshot */
681 700 if (!rbsa->force && dsl_dataset_modified_since_lastsnap(ds))
682 701 return (ETXTBSY);
683 702
684 703 /* new snapshot name must not exist */
685 704 err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset,
686 705 ds->ds_phys->ds_snapnames_zapobj, rbsa->tosnap, 8, 1, &val);
687 706 if (err == 0)
688 707 return (EEXIST);
689 708 if (err != ENOENT)
690 709 return (err);
691 710
692 711 if (rbsa->fromguid) {
693 712 /* if incremental, most recent snapshot must match fromguid */
694 713 if (ds->ds_prev == NULL)
695 714 return (ENODEV);
696 715
697 716 /*
698 717 * most recent snapshot must match fromguid, or there are no
699 718 * changes since the fromguid one
700 719 */
701 720 if (ds->ds_prev->ds_phys->ds_guid != rbsa->fromguid) {
702 721 uint64_t birth = ds->ds_prev->ds_phys->ds_bp.blk_birth;
703 722 uint64_t obj = ds->ds_prev->ds_phys->ds_prev_snap_obj;
704 723 while (obj != 0) {
705 724 dsl_dataset_t *snap;
706 725 err = dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
707 726 obj, FTAG, &snap);
708 727 if (err)
709 728 return (ENODEV);
710 729 if (snap->ds_phys->ds_creation_txg < birth) {
711 730 dsl_dataset_rele(snap, FTAG);
712 731 return (ENODEV);
713 732 }
714 733 if (snap->ds_phys->ds_guid == rbsa->fromguid) {
715 734 dsl_dataset_rele(snap, FTAG);
716 735 break; /* it's ok */
717 736 }
718 737 obj = snap->ds_phys->ds_prev_snap_obj;
719 738 dsl_dataset_rele(snap, FTAG);
720 739 }
721 740 if (obj == 0)
722 741 return (ENODEV);
723 742 }
724 743 } else {
725 744 /* if full, most recent snapshot must be $ORIGIN */
726 745 if (ds->ds_phys->ds_prev_snap_txg >= TXG_INITIAL)
727 746 return (ENODEV);
728 747 }
729 748
730 749 /* temporary clone name must not exist */
731 750 err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset,
732 751 ds->ds_dir->dd_phys->dd_child_dir_zapobj,
733 752 rbsa->clonelastname, 8, 1, &val);
734 753 if (err == 0)
735 754 return (EEXIST);
736 755 if (err != ENOENT)
737 756 return (err);
738 757
739 758 return (0);
740 759 }
741 760
742 761 /* ARGSUSED */
743 762 static void
744 763 recv_existing_sync(void *arg1, void *arg2, dmu_tx_t *tx)
745 764 {
746 765 dsl_dataset_t *ohds = arg1;
747 766 struct recvbeginsyncarg *rbsa = arg2;
748 767 dsl_pool_t *dp = ohds->ds_dir->dd_pool;
749 768 dsl_dataset_t *cds;
750 769 uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags;
751 770 uint64_t dsobj;
752 771
753 772 /* create and open the temporary clone */
754 773 dsobj = dsl_dataset_create_sync(ohds->ds_dir, rbsa->clonelastname,
755 774 ohds->ds_prev, flags, rbsa->cr, tx);
756 775 VERIFY(0 == dsl_dataset_own_obj(dp, dsobj, B_TRUE, dmu_recv_tag, &cds));
757 776
758 777 /*
759 778 * If we actually created a non-clone, we need to create the
760 779 * objset in our new dataset.
761 780 */
762 781 if (BP_IS_HOLE(dsl_dataset_get_blkptr(cds))) {
763 782 (void) dmu_objset_create_impl(dp->dp_spa,
764 783 cds, dsl_dataset_get_blkptr(cds), rbsa->type, tx);
765 784 }
766 785
767 786 rbsa->ds = cds;
768 787
769 788 spa_history_log_internal_ds(cds, "receive over existing", tx, "");
770 789 }
771 790
772 791 static boolean_t
773 792 dmu_recv_verify_features(dsl_dataset_t *ds, struct drr_begin *drrb)
774 793 {
775 794 int featureflags;
776 795
777 796 featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
778 797
779 798 /* Verify pool version supports SA if SA_SPILL feature set */
780 799 return ((featureflags & DMU_BACKUP_FEATURE_SA_SPILL) &&
781 800 (spa_version(dsl_dataset_get_spa(ds)) < SPA_VERSION_SA));
782 801 }
783 802
784 803 /*
785 804 * NB: callers *MUST* call dmu_recv_stream() if dmu_recv_begin()
786 805 * succeeds; otherwise we will leak the holds on the datasets.
787 806 */
788 807 int
789 808 dmu_recv_begin(char *tofs, char *tosnap, char *top_ds, struct drr_begin *drrb,
790 809 boolean_t force, objset_t *origin, dmu_recv_cookie_t *drc)
791 810 {
792 811 int err = 0;
793 812 boolean_t byteswap;
794 813 struct recvbeginsyncarg rbsa = { 0 };
795 814 uint64_t versioninfo;
796 815 int flags;
797 816 dsl_dataset_t *ds;
798 817
799 818 if (drrb->drr_magic == DMU_BACKUP_MAGIC)
800 819 byteswap = FALSE;
801 820 else if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC))
802 821 byteswap = TRUE;
803 822 else
804 823 return (EINVAL);
805 824
806 825 rbsa.tofs = tofs;
807 826 rbsa.tosnap = tosnap;
808 827 rbsa.origin = origin ? origin->os_dsl_dataset : NULL;
809 828 rbsa.fromguid = drrb->drr_fromguid;
810 829 rbsa.type = drrb->drr_type;
811 830 rbsa.tag = FTAG;
812 831 rbsa.dsflags = 0;
813 832 rbsa.cr = CRED();
814 833 versioninfo = drrb->drr_versioninfo;
815 834 flags = drrb->drr_flags;
816 835
817 836 if (byteswap) {
818 837 rbsa.type = BSWAP_32(rbsa.type);
819 838 rbsa.fromguid = BSWAP_64(rbsa.fromguid);
820 839 versioninfo = BSWAP_64(versioninfo);
821 840 flags = BSWAP_32(flags);
822 841 }
823 842
824 843 if (DMU_GET_STREAM_HDRTYPE(versioninfo) == DMU_COMPOUNDSTREAM ||
825 844 rbsa.type >= DMU_OST_NUMTYPES ||
826 845 ((flags & DRR_FLAG_CLONE) && origin == NULL))
827 846 return (EINVAL);
828 847
829 848 if (flags & DRR_FLAG_CI_DATA)
830 849 rbsa.dsflags = DS_FLAG_CI_DATASET;
831 850
832 851 bzero(drc, sizeof (dmu_recv_cookie_t));
833 852 drc->drc_drrb = drrb;
834 853 drc->drc_tosnap = tosnap;
835 854 drc->drc_top_ds = top_ds;
836 855 drc->drc_force = force;
837 856
838 857 /*
839 858 * Process the begin in syncing context.
840 859 */
841 860
842 861 /* open the dataset we are logically receiving into */
843 862 err = dsl_dataset_hold(tofs, dmu_recv_tag, &ds);
844 863 if (err == 0) {
845 864 if (dmu_recv_verify_features(ds, drrb)) {
846 865 dsl_dataset_rele(ds, dmu_recv_tag);
847 866 return (ENOTSUP);
848 867 }
849 868 /* target fs already exists; recv into temp clone */
850 869
851 870 /* Can't recv a clone into an existing fs */
852 871 if (flags & DRR_FLAG_CLONE) {
853 872 dsl_dataset_rele(ds, dmu_recv_tag);
854 873 return (EINVAL);
855 874 }
856 875
857 876 /* must not have an incremental recv already in progress */
858 877 if (!mutex_tryenter(&ds->ds_recvlock)) {
859 878 dsl_dataset_rele(ds, dmu_recv_tag);
860 879 return (EBUSY);
861 880 }
862 881
863 882 /* tmp clone name is: tofs/%tosnap" */
864 883 (void) snprintf(rbsa.clonelastname, sizeof (rbsa.clonelastname),
865 884 "%%%s", tosnap);
866 885 rbsa.force = force;
867 886 err = dsl_sync_task_do(ds->ds_dir->dd_pool,
868 887 recv_existing_check, recv_existing_sync, ds, &rbsa, 5);
869 888 if (err) {
870 889 mutex_exit(&ds->ds_recvlock);
871 890 dsl_dataset_rele(ds, dmu_recv_tag);
872 891 return (err);
873 892 }
874 893 drc->drc_logical_ds = ds;
875 894 drc->drc_real_ds = rbsa.ds;
876 895 } else if (err == ENOENT) {
877 896 /* target fs does not exist; must be a full backup or clone */
878 897 char *cp;
879 898
880 899 /*
881 900 * If it's a non-clone incremental, we are missing the
882 901 * target fs, so fail the recv.
883 902 */
884 903 if (rbsa.fromguid && !(flags & DRR_FLAG_CLONE))
885 904 return (ENOENT);
886 905
887 906 /* Open the parent of tofs */
888 907 cp = strrchr(tofs, '/');
889 908 *cp = '\0';
890 909 err = dsl_dataset_hold(tofs, FTAG, &ds);
891 910 *cp = '/';
892 911 if (err)
893 912 return (err);
894 913
895 914 if (dmu_recv_verify_features(ds, drrb)) {
896 915 dsl_dataset_rele(ds, FTAG);
897 916 return (ENOTSUP);
898 917 }
899 918
900 919 err = dsl_sync_task_do(ds->ds_dir->dd_pool,
901 920 recv_new_check, recv_new_sync, ds->ds_dir, &rbsa, 5);
902 921 dsl_dataset_rele(ds, FTAG);
903 922 if (err)
904 923 return (err);
905 924 drc->drc_logical_ds = drc->drc_real_ds = rbsa.ds;
906 925 drc->drc_newfs = B_TRUE;
907 926 }
908 927
909 928 return (err);
910 929 }
911 930
912 931 struct restorearg {
913 932 int err;
914 933 int byteswap;
915 934 vnode_t *vp;
916 935 char *buf;
917 936 uint64_t voff;
918 937 int bufsize; /* amount of memory allocated for buf */
919 938 zio_cksum_t cksum;
920 939 avl_tree_t *guid_to_ds_map;
921 940 };
922 941
923 942 typedef struct guid_map_entry {
924 943 uint64_t guid;
925 944 dsl_dataset_t *gme_ds;
926 945 avl_node_t avlnode;
927 946 } guid_map_entry_t;
928 947
929 948 static int
930 949 guid_compare(const void *arg1, const void *arg2)
931 950 {
932 951 const guid_map_entry_t *gmep1 = arg1;
933 952 const guid_map_entry_t *gmep2 = arg2;
934 953
935 954 if (gmep1->guid < gmep2->guid)
936 955 return (-1);
937 956 else if (gmep1->guid > gmep2->guid)
938 957 return (1);
939 958 return (0);
940 959 }
941 960
942 961 static void
943 962 free_guid_map_onexit(void *arg)
944 963 {
945 964 avl_tree_t *ca = arg;
946 965 void *cookie = NULL;
947 966 guid_map_entry_t *gmep;
948 967
949 968 while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) {
950 969 dsl_dataset_rele(gmep->gme_ds, ca);
951 970 kmem_free(gmep, sizeof (guid_map_entry_t));
952 971 }
953 972 avl_destroy(ca);
954 973 kmem_free(ca, sizeof (avl_tree_t));
955 974 }
956 975
957 976 static void *
958 977 restore_read(struct restorearg *ra, int len)
959 978 {
960 979 void *rv;
961 980 int done = 0;
962 981
963 982 /* some things will require 8-byte alignment, so everything must */
964 983 ASSERT3U(len % 8, ==, 0);
965 984
966 985 while (done < len) {
967 986 ssize_t resid;
968 987
969 988 ra->err = vn_rdwr(UIO_READ, ra->vp,
970 989 (caddr_t)ra->buf + done, len - done,
971 990 ra->voff, UIO_SYSSPACE, FAPPEND,
972 991 RLIM64_INFINITY, CRED(), &resid);
973 992
974 993 if (resid == len - done)
975 994 ra->err = EINVAL;
976 995 ra->voff += len - done - resid;
977 996 done = len - resid;
978 997 if (ra->err)
979 998 return (NULL);
980 999 }
981 1000
982 1001 ASSERT3U(done, ==, len);
983 1002 rv = ra->buf;
984 1003 if (ra->byteswap)
985 1004 fletcher_4_incremental_byteswap(rv, len, &ra->cksum);
986 1005 else
987 1006 fletcher_4_incremental_native(rv, len, &ra->cksum);
988 1007 return (rv);
989 1008 }
990 1009
991 1010 static void
992 1011 backup_byteswap(dmu_replay_record_t *drr)
993 1012 {
994 1013 #define DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X))
995 1014 #define DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X))
996 1015 drr->drr_type = BSWAP_32(drr->drr_type);
997 1016 drr->drr_payloadlen = BSWAP_32(drr->drr_payloadlen);
998 1017 switch (drr->drr_type) {
999 1018 case DRR_BEGIN:
1000 1019 DO64(drr_begin.drr_magic);
1001 1020 DO64(drr_begin.drr_versioninfo);
1002 1021 DO64(drr_begin.drr_creation_time);
1003 1022 DO32(drr_begin.drr_type);
1004 1023 DO32(drr_begin.drr_flags);
1005 1024 DO64(drr_begin.drr_toguid);
1006 1025 DO64(drr_begin.drr_fromguid);
1007 1026 break;
1008 1027 case DRR_OBJECT:
1009 1028 DO64(drr_object.drr_object);
1010 1029 /* DO64(drr_object.drr_allocation_txg); */
1011 1030 DO32(drr_object.drr_type);
1012 1031 DO32(drr_object.drr_bonustype);
1013 1032 DO32(drr_object.drr_blksz);
1014 1033 DO32(drr_object.drr_bonuslen);
1015 1034 DO64(drr_object.drr_toguid);
1016 1035 break;
1017 1036 case DRR_FREEOBJECTS:
1018 1037 DO64(drr_freeobjects.drr_firstobj);
1019 1038 DO64(drr_freeobjects.drr_numobjs);
1020 1039 DO64(drr_freeobjects.drr_toguid);
1021 1040 break;
1022 1041 case DRR_WRITE:
1023 1042 DO64(drr_write.drr_object);
1024 1043 DO32(drr_write.drr_type);
1025 1044 DO64(drr_write.drr_offset);
1026 1045 DO64(drr_write.drr_length);
1027 1046 DO64(drr_write.drr_toguid);
1028 1047 DO64(drr_write.drr_key.ddk_cksum.zc_word[0]);
1029 1048 DO64(drr_write.drr_key.ddk_cksum.zc_word[1]);
1030 1049 DO64(drr_write.drr_key.ddk_cksum.zc_word[2]);
1031 1050 DO64(drr_write.drr_key.ddk_cksum.zc_word[3]);
1032 1051 DO64(drr_write.drr_key.ddk_prop);
1033 1052 break;
1034 1053 case DRR_WRITE_BYREF:
1035 1054 DO64(drr_write_byref.drr_object);
1036 1055 DO64(drr_write_byref.drr_offset);
1037 1056 DO64(drr_write_byref.drr_length);
1038 1057 DO64(drr_write_byref.drr_toguid);
1039 1058 DO64(drr_write_byref.drr_refguid);
1040 1059 DO64(drr_write_byref.drr_refobject);
1041 1060 DO64(drr_write_byref.drr_refoffset);
1042 1061 DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[0]);
1043 1062 DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[1]);
1044 1063 DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[2]);
1045 1064 DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[3]);
1046 1065 DO64(drr_write_byref.drr_key.ddk_prop);
1047 1066 break;
1048 1067 case DRR_FREE:
1049 1068 DO64(drr_free.drr_object);
1050 1069 DO64(drr_free.drr_offset);
1051 1070 DO64(drr_free.drr_length);
1052 1071 DO64(drr_free.drr_toguid);
1053 1072 break;
1054 1073 case DRR_SPILL:
1055 1074 DO64(drr_spill.drr_object);
1056 1075 DO64(drr_spill.drr_length);
1057 1076 DO64(drr_spill.drr_toguid);
1058 1077 break;
1059 1078 case DRR_END:
1060 1079 DO64(drr_end.drr_checksum.zc_word[0]);
1061 1080 DO64(drr_end.drr_checksum.zc_word[1]);
1062 1081 DO64(drr_end.drr_checksum.zc_word[2]);
1063 1082 DO64(drr_end.drr_checksum.zc_word[3]);
1064 1083 DO64(drr_end.drr_toguid);
1065 1084 break;
1066 1085 }
1067 1086 #undef DO64
1068 1087 #undef DO32
1069 1088 }
1070 1089
1071 1090 static int
1072 1091 restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
1073 1092 {
1074 1093 int err;
1075 1094 dmu_tx_t *tx;
1076 1095 void *data = NULL;
1077 1096
1078 1097 if (drro->drr_type == DMU_OT_NONE ||
1079 1098 !DMU_OT_IS_VALID(drro->drr_type) ||
1080 1099 !DMU_OT_IS_VALID(drro->drr_bonustype) ||
1081 1100 drro->drr_checksumtype >= ZIO_CHECKSUM_FUNCTIONS ||
1082 1101 drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS ||
1083 1102 P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) ||
1084 1103 drro->drr_blksz < SPA_MINBLOCKSIZE ||
1085 1104 drro->drr_blksz > SPA_MAXBLOCKSIZE ||
1086 1105 drro->drr_bonuslen > DN_MAX_BONUSLEN) {
1087 1106 return (EINVAL);
1088 1107 }
1089 1108
1090 1109 err = dmu_object_info(os, drro->drr_object, NULL);
1091 1110
1092 1111 if (err != 0 && err != ENOENT)
1093 1112 return (EINVAL);
1094 1113
1095 1114 if (drro->drr_bonuslen) {
1096 1115 data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8));
1097 1116 if (ra->err)
1098 1117 return (ra->err);
1099 1118 }
1100 1119
1101 1120 if (err == ENOENT) {
1102 1121 /* currently free, want to be allocated */
1103 1122 tx = dmu_tx_create(os);
1104 1123 dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
1105 1124 err = dmu_tx_assign(tx, TXG_WAIT);
1106 1125 if (err) {
1107 1126 dmu_tx_abort(tx);
1108 1127 return (err);
1109 1128 }
1110 1129 err = dmu_object_claim(os, drro->drr_object,
1111 1130 drro->drr_type, drro->drr_blksz,
1112 1131 drro->drr_bonustype, drro->drr_bonuslen, tx);
1113 1132 dmu_tx_commit(tx);
1114 1133 } else {
1115 1134 /* currently allocated, want to be allocated */
1116 1135 err = dmu_object_reclaim(os, drro->drr_object,
1117 1136 drro->drr_type, drro->drr_blksz,
1118 1137 drro->drr_bonustype, drro->drr_bonuslen);
1119 1138 }
1120 1139 if (err) {
1121 1140 return (EINVAL);
1122 1141 }
1123 1142
1124 1143 tx = dmu_tx_create(os);
1125 1144 dmu_tx_hold_bonus(tx, drro->drr_object);
1126 1145 err = dmu_tx_assign(tx, TXG_WAIT);
1127 1146 if (err) {
1128 1147 dmu_tx_abort(tx);
1129 1148 return (err);
1130 1149 }
1131 1150
1132 1151 dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksumtype,
1133 1152 tx);
1134 1153 dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx);
1135 1154
1136 1155 if (data != NULL) {
1137 1156 dmu_buf_t *db;
1138 1157
1139 1158 VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db));
1140 1159 dmu_buf_will_dirty(db, tx);
1141 1160
1142 1161 ASSERT3U(db->db_size, >=, drro->drr_bonuslen);
1143 1162 bcopy(data, db->db_data, drro->drr_bonuslen);
1144 1163 if (ra->byteswap) {
1145 1164 dmu_object_byteswap_t byteswap =
1146 1165 DMU_OT_BYTESWAP(drro->drr_bonustype);
1147 1166 dmu_ot_byteswap[byteswap].ob_func(db->db_data,
1148 1167 drro->drr_bonuslen);
1149 1168 }
1150 1169 dmu_buf_rele(db, FTAG);
1151 1170 }
1152 1171 dmu_tx_commit(tx);
1153 1172 return (0);
1154 1173 }
1155 1174
1156 1175 /* ARGSUSED */
1157 1176 static int
1158 1177 restore_freeobjects(struct restorearg *ra, objset_t *os,
1159 1178 struct drr_freeobjects *drrfo)
1160 1179 {
1161 1180 uint64_t obj;
1162 1181
1163 1182 if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj)
1164 1183 return (EINVAL);
1165 1184
1166 1185 for (obj = drrfo->drr_firstobj;
1167 1186 obj < drrfo->drr_firstobj + drrfo->drr_numobjs;
1168 1187 (void) dmu_object_next(os, &obj, FALSE, 0)) {
1169 1188 int err;
1170 1189
1171 1190 if (dmu_object_info(os, obj, NULL) != 0)
1172 1191 continue;
1173 1192
1174 1193 err = dmu_free_object(os, obj);
1175 1194 if (err)
1176 1195 return (err);
1177 1196 }
1178 1197 return (0);
1179 1198 }
1180 1199
1181 1200 static int
1182 1201 restore_write(struct restorearg *ra, objset_t *os,
1183 1202 struct drr_write *drrw)
1184 1203 {
1185 1204 dmu_tx_t *tx;
1186 1205 void *data;
1187 1206 int err;
1188 1207
1189 1208 if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset ||
1190 1209 !DMU_OT_IS_VALID(drrw->drr_type))
1191 1210 return (EINVAL);
1192 1211
1193 1212 data = restore_read(ra, drrw->drr_length);
1194 1213 if (data == NULL)
1195 1214 return (ra->err);
1196 1215
1197 1216 if (dmu_object_info(os, drrw->drr_object, NULL) != 0)
1198 1217 return (EINVAL);
1199 1218
1200 1219 tx = dmu_tx_create(os);
1201 1220
1202 1221 dmu_tx_hold_write(tx, drrw->drr_object,
1203 1222 drrw->drr_offset, drrw->drr_length);
1204 1223 err = dmu_tx_assign(tx, TXG_WAIT);
1205 1224 if (err) {
1206 1225 dmu_tx_abort(tx);
1207 1226 return (err);
1208 1227 }
1209 1228 if (ra->byteswap) {
1210 1229 dmu_object_byteswap_t byteswap =
1211 1230 DMU_OT_BYTESWAP(drrw->drr_type);
1212 1231 dmu_ot_byteswap[byteswap].ob_func(data, drrw->drr_length);
1213 1232 }
1214 1233 dmu_write(os, drrw->drr_object,
1215 1234 drrw->drr_offset, drrw->drr_length, data, tx);
1216 1235 dmu_tx_commit(tx);
1217 1236 return (0);
1218 1237 }
1219 1238
1220 1239 /*
1221 1240 * Handle a DRR_WRITE_BYREF record. This record is used in dedup'ed
1222 1241 * streams to refer to a copy of the data that is already on the
1223 1242 * system because it came in earlier in the stream. This function
1224 1243 * finds the earlier copy of the data, and uses that copy instead of
1225 1244 * data from the stream to fulfill this write.
1226 1245 */
1227 1246 static int
1228 1247 restore_write_byref(struct restorearg *ra, objset_t *os,
1229 1248 struct drr_write_byref *drrwbr)
1230 1249 {
1231 1250 dmu_tx_t *tx;
1232 1251 int err;
1233 1252 guid_map_entry_t gmesrch;
1234 1253 guid_map_entry_t *gmep;
1235 1254 avl_index_t where;
1236 1255 objset_t *ref_os = NULL;
1237 1256 dmu_buf_t *dbp;
1238 1257
1239 1258 if (drrwbr->drr_offset + drrwbr->drr_length < drrwbr->drr_offset)
1240 1259 return (EINVAL);
1241 1260
1242 1261 /*
1243 1262 * If the GUID of the referenced dataset is different from the
1244 1263 * GUID of the target dataset, find the referenced dataset.
1245 1264 */
1246 1265 if (drrwbr->drr_toguid != drrwbr->drr_refguid) {
1247 1266 gmesrch.guid = drrwbr->drr_refguid;
1248 1267 if ((gmep = avl_find(ra->guid_to_ds_map, &gmesrch,
1249 1268 &where)) == NULL) {
1250 1269 return (EINVAL);
1251 1270 }
1252 1271 if (dmu_objset_from_ds(gmep->gme_ds, &ref_os))
1253 1272 return (EINVAL);
1254 1273 } else {
1255 1274 ref_os = os;
1256 1275 }
1257 1276
1258 1277 if (err = dmu_buf_hold(ref_os, drrwbr->drr_refobject,
1259 1278 drrwbr->drr_refoffset, FTAG, &dbp, DMU_READ_PREFETCH))
1260 1279 return (err);
1261 1280
1262 1281 tx = dmu_tx_create(os);
1263 1282
1264 1283 dmu_tx_hold_write(tx, drrwbr->drr_object,
1265 1284 drrwbr->drr_offset, drrwbr->drr_length);
1266 1285 err = dmu_tx_assign(tx, TXG_WAIT);
1267 1286 if (err) {
1268 1287 dmu_tx_abort(tx);
1269 1288 return (err);
1270 1289 }
1271 1290 dmu_write(os, drrwbr->drr_object,
1272 1291 drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx);
1273 1292 dmu_buf_rele(dbp, FTAG);
1274 1293 dmu_tx_commit(tx);
1275 1294 return (0);
1276 1295 }
1277 1296
1278 1297 static int
1279 1298 restore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs)
1280 1299 {
1281 1300 dmu_tx_t *tx;
1282 1301 void *data;
1283 1302 dmu_buf_t *db, *db_spill;
1284 1303 int err;
1285 1304
1286 1305 if (drrs->drr_length < SPA_MINBLOCKSIZE ||
1287 1306 drrs->drr_length > SPA_MAXBLOCKSIZE)
1288 1307 return (EINVAL);
1289 1308
1290 1309 data = restore_read(ra, drrs->drr_length);
1291 1310 if (data == NULL)
1292 1311 return (ra->err);
1293 1312
1294 1313 if (dmu_object_info(os, drrs->drr_object, NULL) != 0)
1295 1314 return (EINVAL);
1296 1315
1297 1316 VERIFY(0 == dmu_bonus_hold(os, drrs->drr_object, FTAG, &db));
1298 1317 if ((err = dmu_spill_hold_by_bonus(db, FTAG, &db_spill)) != 0) {
1299 1318 dmu_buf_rele(db, FTAG);
1300 1319 return (err);
1301 1320 }
1302 1321
1303 1322 tx = dmu_tx_create(os);
1304 1323
1305 1324 dmu_tx_hold_spill(tx, db->db_object);
1306 1325
1307 1326 err = dmu_tx_assign(tx, TXG_WAIT);
1308 1327 if (err) {
1309 1328 dmu_buf_rele(db, FTAG);
1310 1329 dmu_buf_rele(db_spill, FTAG);
1311 1330 dmu_tx_abort(tx);
1312 1331 return (err);
1313 1332 }
1314 1333 dmu_buf_will_dirty(db_spill, tx);
1315 1334
1316 1335 if (db_spill->db_size < drrs->drr_length)
1317 1336 VERIFY(0 == dbuf_spill_set_blksz(db_spill,
1318 1337 drrs->drr_length, tx));
1319 1338 bcopy(data, db_spill->db_data, drrs->drr_length);
1320 1339
1321 1340 dmu_buf_rele(db, FTAG);
1322 1341 dmu_buf_rele(db_spill, FTAG);
1323 1342
1324 1343 dmu_tx_commit(tx);
1325 1344 return (0);
1326 1345 }
1327 1346
1328 1347 /* ARGSUSED */
1329 1348 static int
1330 1349 restore_free(struct restorearg *ra, objset_t *os,
1331 1350 struct drr_free *drrf)
1332 1351 {
1333 1352 int err;
1334 1353
1335 1354 if (drrf->drr_length != -1ULL &&
1336 1355 drrf->drr_offset + drrf->drr_length < drrf->drr_offset)
1337 1356 return (EINVAL);
1338 1357
1339 1358 if (dmu_object_info(os, drrf->drr_object, NULL) != 0)
1340 1359 return (EINVAL);
1341 1360
1342 1361 err = dmu_free_long_range(os, drrf->drr_object,
1343 1362 drrf->drr_offset, drrf->drr_length);
1344 1363 return (err);
1345 1364 }
1346 1365
1347 1366 /*
1348 1367 * NB: callers *must* call dmu_recv_end() if this succeeds.
1349 1368 */
1350 1369 int
1351 1370 dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
1352 1371 int cleanup_fd, uint64_t *action_handlep)
1353 1372 {
1354 1373 struct restorearg ra = { 0 };
1355 1374 dmu_replay_record_t *drr;
1356 1375 objset_t *os;
1357 1376 zio_cksum_t pcksum;
1358 1377 int featureflags;
1359 1378
1360 1379 if (drc->drc_drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC))
1361 1380 ra.byteswap = TRUE;
1362 1381
1363 1382 {
1364 1383 /* compute checksum of drr_begin record */
1365 1384 dmu_replay_record_t *drr;
1366 1385 drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
1367 1386
1368 1387 drr->drr_type = DRR_BEGIN;
1369 1388 drr->drr_u.drr_begin = *drc->drc_drrb;
1370 1389 if (ra.byteswap) {
1371 1390 fletcher_4_incremental_byteswap(drr,
1372 1391 sizeof (dmu_replay_record_t), &ra.cksum);
1373 1392 } else {
1374 1393 fletcher_4_incremental_native(drr,
1375 1394 sizeof (dmu_replay_record_t), &ra.cksum);
1376 1395 }
1377 1396 kmem_free(drr, sizeof (dmu_replay_record_t));
1378 1397 }
1379 1398
1380 1399 if (ra.byteswap) {
1381 1400 struct drr_begin *drrb = drc->drc_drrb;
1382 1401 drrb->drr_magic = BSWAP_64(drrb->drr_magic);
1383 1402 drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
1384 1403 drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
1385 1404 drrb->drr_type = BSWAP_32(drrb->drr_type);
1386 1405 drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
1387 1406 drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
1388 1407 }
1389 1408
1390 1409 ra.vp = vp;
1391 1410 ra.voff = *voffp;
1392 1411 ra.bufsize = 1<<20;
1393 1412 ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP);
1394 1413
1395 1414 /* these were verified in dmu_recv_begin */
1396 1415 ASSERT(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo) ==
1397 1416 DMU_SUBSTREAM);
1398 1417 ASSERT(drc->drc_drrb->drr_type < DMU_OST_NUMTYPES);
1399 1418
1400 1419 /*
1401 1420 * Open the objset we are modifying.
1402 1421 */
1403 1422 VERIFY(dmu_objset_from_ds(drc->drc_real_ds, &os) == 0);
1404 1423
1405 1424 ASSERT(drc->drc_real_ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT);
1406 1425
1407 1426 featureflags = DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo);
1408 1427
1409 1428 /* if this stream is dedup'ed, set up the avl tree for guid mapping */
1410 1429 if (featureflags & DMU_BACKUP_FEATURE_DEDUP) {
1411 1430 minor_t minor;
1412 1431
1413 1432 if (cleanup_fd == -1) {
1414 1433 ra.err = EBADF;
1415 1434 goto out;
1416 1435 }
1417 1436 ra.err = zfs_onexit_fd_hold(cleanup_fd, &minor);
1418 1437 if (ra.err) {
1419 1438 cleanup_fd = -1;
1420 1439 goto out;
1421 1440 }
1422 1441
1423 1442 if (*action_handlep == 0) {
1424 1443 ra.guid_to_ds_map =
1425 1444 kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
1426 1445 avl_create(ra.guid_to_ds_map, guid_compare,
1427 1446 sizeof (guid_map_entry_t),
1428 1447 offsetof(guid_map_entry_t, avlnode));
1429 1448 ra.err = zfs_onexit_add_cb(minor,
1430 1449 free_guid_map_onexit, ra.guid_to_ds_map,
1431 1450 action_handlep);
1432 1451 if (ra.err)
1433 1452 goto out;
1434 1453 } else {
1435 1454 ra.err = zfs_onexit_cb_data(minor, *action_handlep,
1436 1455 (void **)&ra.guid_to_ds_map);
1437 1456 if (ra.err)
1438 1457 goto out;
1439 1458 }
1440 1459
1441 1460 drc->drc_guid_to_ds_map = ra.guid_to_ds_map;
1442 1461 }
1443 1462
1444 1463 /*
1445 1464 * Read records and process them.
1446 1465 */
1447 1466 pcksum = ra.cksum;
1448 1467 while (ra.err == 0 &&
1449 1468 NULL != (drr = restore_read(&ra, sizeof (*drr)))) {
1450 1469 if (issig(JUSTLOOKING) && issig(FORREAL)) {
1451 1470 ra.err = EINTR;
1452 1471 goto out;
1453 1472 }
1454 1473
1455 1474 if (ra.byteswap)
1456 1475 backup_byteswap(drr);
1457 1476
1458 1477 switch (drr->drr_type) {
1459 1478 case DRR_OBJECT:
1460 1479 {
1461 1480 /*
1462 1481 * We need to make a copy of the record header,
1463 1482 * because restore_{object,write} may need to
1464 1483 * restore_read(), which will invalidate drr.
1465 1484 */
1466 1485 struct drr_object drro = drr->drr_u.drr_object;
1467 1486 ra.err = restore_object(&ra, os, &drro);
1468 1487 break;
1469 1488 }
1470 1489 case DRR_FREEOBJECTS:
1471 1490 {
1472 1491 struct drr_freeobjects drrfo =
1473 1492 drr->drr_u.drr_freeobjects;
1474 1493 ra.err = restore_freeobjects(&ra, os, &drrfo);
1475 1494 break;
1476 1495 }
1477 1496 case DRR_WRITE:
1478 1497 {
1479 1498 struct drr_write drrw = drr->drr_u.drr_write;
1480 1499 ra.err = restore_write(&ra, os, &drrw);
1481 1500 break;
1482 1501 }
1483 1502 case DRR_WRITE_BYREF:
1484 1503 {
1485 1504 struct drr_write_byref drrwbr =
1486 1505 drr->drr_u.drr_write_byref;
1487 1506 ra.err = restore_write_byref(&ra, os, &drrwbr);
1488 1507 break;
1489 1508 }
1490 1509 case DRR_FREE:
1491 1510 {
1492 1511 struct drr_free drrf = drr->drr_u.drr_free;
1493 1512 ra.err = restore_free(&ra, os, &drrf);
1494 1513 break;
1495 1514 }
1496 1515 case DRR_END:
1497 1516 {
1498 1517 struct drr_end drre = drr->drr_u.drr_end;
1499 1518 /*
1500 1519 * We compare against the *previous* checksum
1501 1520 * value, because the stored checksum is of
1502 1521 * everything before the DRR_END record.
1503 1522 */
1504 1523 if (!ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pcksum))
1505 1524 ra.err = ECKSUM;
1506 1525 goto out;
1507 1526 }
1508 1527 case DRR_SPILL:
1509 1528 {
1510 1529 struct drr_spill drrs = drr->drr_u.drr_spill;
1511 1530 ra.err = restore_spill(&ra, os, &drrs);
1512 1531 break;
1513 1532 }
1514 1533 default:
1515 1534 ra.err = EINVAL;
1516 1535 goto out;
1517 1536 }
1518 1537 pcksum = ra.cksum;
1519 1538 }
1520 1539 ASSERT(ra.err != 0);
1521 1540
1522 1541 out:
1523 1542 if ((featureflags & DMU_BACKUP_FEATURE_DEDUP) && (cleanup_fd != -1))
1524 1543 zfs_onexit_fd_rele(cleanup_fd);
1525 1544
1526 1545 if (ra.err != 0) {
1527 1546 /*
1528 1547 * destroy what we created, so we don't leave it in the
1529 1548 * inconsistent restoring state.
1530 1549 */
1531 1550 txg_wait_synced(drc->drc_real_ds->ds_dir->dd_pool, 0);
1532 1551
1533 1552 (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag,
1534 1553 B_FALSE);
1535 1554 if (drc->drc_real_ds != drc->drc_logical_ds) {
1536 1555 mutex_exit(&drc->drc_logical_ds->ds_recvlock);
1537 1556 dsl_dataset_rele(drc->drc_logical_ds, dmu_recv_tag);
1538 1557 }
1539 1558 }
1540 1559
1541 1560 kmem_free(ra.buf, ra.bufsize);
1542 1561 *voffp = ra.voff;
1543 1562 return (ra.err);
1544 1563 }
1545 1564
1546 1565 struct recvendsyncarg {
1547 1566 char *tosnap;
1548 1567 uint64_t creation_time;
1549 1568 uint64_t toguid;
1550 1569 };
1551 1570
1552 1571 static int
1553 1572 recv_end_check(void *arg1, void *arg2, dmu_tx_t *tx)
1554 1573 {
1555 1574 dsl_dataset_t *ds = arg1;
1556 1575 struct recvendsyncarg *resa = arg2;
1557 1576
1558 1577 return (dsl_dataset_snapshot_check(ds, resa->tosnap, tx));
1559 1578 }
1560 1579
1561 1580 static void
1562 1581 recv_end_sync(void *arg1, void *arg2, dmu_tx_t *tx)
1563 1582 {
1564 1583 dsl_dataset_t *ds = arg1;
1565 1584 struct recvendsyncarg *resa = arg2;
1566 1585
1567 1586 dsl_dataset_snapshot_sync(ds, resa->tosnap, tx);
1568 1587
1569 1588 /* set snapshot's creation time and guid */
1570 1589 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
1571 1590 ds->ds_prev->ds_phys->ds_creation_time = resa->creation_time;
1572 1591 ds->ds_prev->ds_phys->ds_guid = resa->toguid;
1573 1592 ds->ds_prev->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
1574 1593
1575 1594 dmu_buf_will_dirty(ds->ds_dbuf, tx);
1576 1595 ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
1577 1596 spa_history_log_internal_ds(ds, "finished receiving", tx, "");
1578 1597 }
1579 1598
1580 1599 static int
1581 1600 add_ds_to_guidmap(avl_tree_t *guid_map, dsl_dataset_t *ds)
1582 1601 {
1583 1602 dsl_pool_t *dp = ds->ds_dir->dd_pool;
1584 1603 uint64_t snapobj = ds->ds_phys->ds_prev_snap_obj;
1585 1604 dsl_dataset_t *snapds;
1586 1605 guid_map_entry_t *gmep;
1587 1606 int err;
1588 1607
1589 1608 ASSERT(guid_map != NULL);
1590 1609
1591 1610 rw_enter(&dp->dp_config_rwlock, RW_READER);
1592 1611 err = dsl_dataset_hold_obj(dp, snapobj, guid_map, &snapds);
1593 1612 if (err == 0) {
1594 1613 gmep = kmem_alloc(sizeof (guid_map_entry_t), KM_SLEEP);
1595 1614 gmep->guid = snapds->ds_phys->ds_guid;
1596 1615 gmep->gme_ds = snapds;
1597 1616 avl_add(guid_map, gmep);
1598 1617 }
1599 1618
1600 1619 rw_exit(&dp->dp_config_rwlock);
1601 1620 return (err);
1602 1621 }
1603 1622
1604 1623 static int
1605 1624 dmu_recv_existing_end(dmu_recv_cookie_t *drc)
1606 1625 {
1607 1626 struct recvendsyncarg resa;
1608 1627 dsl_dataset_t *ds = drc->drc_logical_ds;
1609 1628 int err, myerr;
1610 1629
1611 1630 /*
1612 1631 * XXX hack; seems the ds is still dirty and dsl_pool_zil_clean()
1613 1632 * expects it to have a ds_user_ptr (and zil), but clone_swap()
1614 1633 * can close it.
1615 1634 */
1616 1635 txg_wait_synced(ds->ds_dir->dd_pool, 0);
1617 1636
1618 1637 if (dsl_dataset_tryown(ds, FALSE, dmu_recv_tag)) {
1619 1638 err = dsl_dataset_clone_swap(drc->drc_real_ds, ds,
1620 1639 drc->drc_force);
1621 1640 if (err)
1622 1641 goto out;
1623 1642 } else {
1624 1643 mutex_exit(&ds->ds_recvlock);
1625 1644 dsl_dataset_rele(ds, dmu_recv_tag);
1626 1645 (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag,
1627 1646 B_FALSE);
1628 1647 return (EBUSY);
1629 1648 }
1630 1649
1631 1650 resa.creation_time = drc->drc_drrb->drr_creation_time;
1632 1651 resa.toguid = drc->drc_drrb->drr_toguid;
1633 1652 resa.tosnap = drc->drc_tosnap;
1634 1653
1635 1654 err = dsl_sync_task_do(ds->ds_dir->dd_pool,
1636 1655 recv_end_check, recv_end_sync, ds, &resa, 3);
1637 1656 if (err) {
1638 1657 /* swap back */
1639 1658 (void) dsl_dataset_clone_swap(drc->drc_real_ds, ds, B_TRUE);
1640 1659 }
1641 1660
1642 1661 out:
1643 1662 mutex_exit(&ds->ds_recvlock);
1644 1663 if (err == 0 && drc->drc_guid_to_ds_map != NULL)
1645 1664 (void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds);
1646 1665 dsl_dataset_disown(ds, dmu_recv_tag);
1647 1666 myerr = dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, B_FALSE);
1648 1667 ASSERT3U(myerr, ==, 0);
1649 1668 return (err);
1650 1669 }
1651 1670
1652 1671 static int
1653 1672 dmu_recv_new_end(dmu_recv_cookie_t *drc)
1654 1673 {
1655 1674 struct recvendsyncarg resa;
1656 1675 dsl_dataset_t *ds = drc->drc_logical_ds;
1657 1676 int err;
1658 1677
1659 1678 /*
1660 1679 * XXX hack; seems the ds is still dirty and dsl_pool_zil_clean()
1661 1680 * expects it to have a ds_user_ptr (and zil), but clone_swap()
1662 1681 * can close it.
1663 1682 */
1664 1683 txg_wait_synced(ds->ds_dir->dd_pool, 0);
1665 1684
1666 1685 resa.creation_time = drc->drc_drrb->drr_creation_time;
1667 1686 resa.toguid = drc->drc_drrb->drr_toguid;
1668 1687 resa.tosnap = drc->drc_tosnap;
1669 1688
1670 1689 err = dsl_sync_task_do(ds->ds_dir->dd_pool,
1671 1690 recv_end_check, recv_end_sync, ds, &resa, 3);
1672 1691 if (err) {
1673 1692 /* clean up the fs we just recv'd into */
1674 1693 (void) dsl_dataset_destroy(ds, dmu_recv_tag, B_FALSE);
1675 1694 } else {
1676 1695 if (drc->drc_guid_to_ds_map != NULL)
1677 1696 (void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds);
1678 1697 /* release the hold from dmu_recv_begin */
1679 1698 dsl_dataset_disown(ds, dmu_recv_tag);
1680 1699 }
1681 1700 return (err);
1682 1701 }
1683 1702
1684 1703 int
1685 1704 dmu_recv_end(dmu_recv_cookie_t *drc)
1686 1705 {
1687 1706 if (drc->drc_logical_ds != drc->drc_real_ds)
1688 1707 return (dmu_recv_existing_end(drc));
1689 1708 else
1690 1709 return (dmu_recv_new_end(drc));
1691 1710 }
|
↓ open down ↓ |
1171 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX