3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 * Copyright (c) 2012 by Delphix. All rights reserved.
25 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
26 */
27
28 #include <sys/dmu.h>
29 #include <sys/dmu_impl.h>
30 #include <sys/dmu_tx.h>
31 #include <sys/dbuf.h>
32 #include <sys/dnode.h>
33 #include <sys/zfs_context.h>
34 #include <sys/dmu_objset.h>
35 #include <sys/dmu_traverse.h>
36 #include <sys/dsl_dataset.h>
37 #include <sys/dsl_dir.h>
38 #include <sys/dsl_prop.h>
39 #include <sys/dsl_pool.h>
40 #include <sys/dsl_synctask.h>
41 #include <sys/zfs_ioctl.h>
42 #include <sys/zap.h>
43 #include <sys/zio_checksum.h>
44 #include <sys/zfs_znode.h>
45 #include <zfs_fletcher.h>
46 #include <sys/avl.h>
47 #include <sys/ddt.h>
48 #include <sys/zfs_onexit.h>
49
50 /* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */
51 int zfs_send_corrupt_data = B_FALSE;
52
53 static char *dmu_recv_tag = "dmu_recv_tag";
54
55 static int
56 dump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
57 {
58 dsl_dataset_t *ds = dsp->dsa_os->os_dsl_dataset;
59 ssize_t resid; /* have to get resid to get detailed errno */
60 ASSERT3U(len % 8, ==, 0);
61
62 fletcher_4_incremental_native(buf, len, &dsp->dsa_zc);
63 dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp,
64 (caddr_t)buf, len,
65 0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid);
66
67 mutex_enter(&ds->ds_sendstream_lock);
68 *dsp->dsa_off += len;
69 mutex_exit(&ds->ds_sendstream_lock);
70
71 return (dsp->dsa_err);
72 }
73
74 static int
75 dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
76 uint64_t length)
77 {
78 struct drr_free *drrf = &(dsp->dsa_drr->drr_u.drr_free);
79
80 if (length != -1ULL && offset + length < offset)
81 length = -1ULL;
82
83 /*
84 * If there is a pending op, but it's not PENDING_FREE, push it out,
85 * since free block aggregation can only be done for blocks of the
86 * same type (i.e., DRR_FREE records can only be aggregated with
341 (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i;
342 err = dump_dnode(dsp, dnobj, blk+i);
343 if (err)
344 break;
345 }
346 (void) arc_buf_remove_ref(abuf, &abuf);
347 } else if (type == DMU_OT_SA) {
348 uint32_t aflags = ARC_WAIT;
349 arc_buf_t *abuf;
350 int blksz = BP_GET_LSIZE(bp);
351
352 if (arc_read_nolock(NULL, spa, bp,
353 arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
354 ZIO_FLAG_CANFAIL, &aflags, zb) != 0)
355 return (EIO);
356
357 err = dump_spill(dsp, zb->zb_object, blksz, abuf->b_data);
358 (void) arc_buf_remove_ref(abuf, &abuf);
359 } else { /* it's a level-0 block of a regular object */
360 uint32_t aflags = ARC_WAIT;
361 arc_buf_t *abuf;
362 int blksz = BP_GET_LSIZE(bp);
363
364 if (dsl_read(NULL, spa, bp, pbuf,
365 arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
366 ZIO_FLAG_CANFAIL, &aflags, zb) != 0) {
367 if (zfs_send_corrupt_data) {
368 /* Send a block filled with 0x"zfs badd bloc" */
369 abuf = arc_buf_alloc(spa, blksz, &abuf,
370 ARC_BUFC_DATA);
371 uint64_t *ptr;
372 for (ptr = abuf->b_data;
373 (char *)ptr < (char *)abuf->b_data + blksz;
374 ptr++)
375 *ptr = 0x2f5baddb10c;
376 } else {
377 return (EIO);
378 }
379 }
380
381 err = dump_data(dsp, type, zb->zb_object, zb->zb_blkid * blksz,
382 blksz, bp, abuf->b_data);
383 (void) arc_buf_remove_ref(abuf, &abuf);
384 }
385
386 ASSERT(err == 0 || err == EINTR);
387 return (err);
388 }
389
390 /*
391 * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline.
392 * For example, they could both be snapshots of the same filesystem, and
393 * 'earlier' is before 'later'. Or 'earlier' could be the origin of
394 * 'later's filesystem. Or 'earlier' could be an older snapshot in the origin's
395 * filesystem. Or 'earlier' could be the origin's origin.
396 */
397 static boolean_t
398 is_before(dsl_dataset_t *later, dsl_dataset_t *earlier)
399 {
400 dsl_pool_t *dp = later->ds_dir->dd_pool;
401 int error;
402 boolean_t ret;
403 dsl_dataset_t *origin;
404
409 if (later->ds_dir == earlier->ds_dir)
410 return (B_TRUE);
411 if (!dsl_dir_is_clone(later->ds_dir))
412 return (B_FALSE);
413
414 rw_enter(&dp->dp_config_rwlock, RW_READER);
415 if (later->ds_dir->dd_phys->dd_origin_obj == earlier->ds_object) {
416 rw_exit(&dp->dp_config_rwlock);
417 return (B_TRUE);
418 }
419 error = dsl_dataset_hold_obj(dp,
420 later->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin);
421 rw_exit(&dp->dp_config_rwlock);
422 if (error != 0)
423 return (B_FALSE);
424 ret = is_before(origin, earlier);
425 dsl_dataset_rele(origin, FTAG);
426 return (ret);
427 }
428
429 int
430 dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp,
431 offset_t *off)
432 {
433 dsl_dataset_t *ds = tosnap->os_dsl_dataset;
434 dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL;
435 dmu_replay_record_t *drr;
436 dmu_sendarg_t *dsp;
437 int err;
438 uint64_t fromtxg = 0;
439
440 /* tosnap must be a snapshot */
441 if (ds->ds_phys->ds_next_snap_obj == 0)
442 return (EINVAL);
443
444 /*
445 * fromsnap must be an earlier snapshot from the same fs as tosnap,
446 * or the origin's fs.
447 */
448 if (fromds != NULL && !is_before(ds, fromds))
449 return (EXDEV);
450
451 drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
479 drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA;
480
481 if (fromds)
482 drr->drr_u.drr_begin.drr_fromguid = fromds->ds_phys->ds_guid;
483 dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname);
484
485 if (fromds)
486 fromtxg = fromds->ds_phys->ds_creation_txg;
487
488 dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP);
489
490 dsp->dsa_drr = drr;
491 dsp->dsa_vp = vp;
492 dsp->dsa_outfd = outfd;
493 dsp->dsa_proc = curproc;
494 dsp->dsa_os = tosnap;
495 dsp->dsa_off = off;
496 dsp->dsa_toguid = ds->ds_phys->ds_guid;
497 ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0);
498 dsp->dsa_pending_op = PENDING_NONE;
499
500 mutex_enter(&ds->ds_sendstream_lock);
501 list_insert_head(&ds->ds_sendstreams, dsp);
502 mutex_exit(&ds->ds_sendstream_lock);
503
504 if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
505 err = dsp->dsa_err;
506 goto out;
507 }
508
509 err = traverse_dataset(ds, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH,
510 backup_cb, dsp);
511
512 if (dsp->dsa_pending_op != PENDING_NONE)
513 if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0)
514 err = EINTR;
515
516 if (err) {
517 if (err == EINTR && dsp->dsa_err)
518 err = dsp->dsa_err;
519 goto out;
520 }
521
522 bzero(drr, sizeof (dmu_replay_record_t));
523 drr->drr_type = DRR_END;
524 drr->drr_u.drr_end.drr_checksum = dsp->dsa_zc;
525 drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid;
526
527 if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
528 err = dsp->dsa_err;
529 goto out;
530 }
|
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
24 * Copyright (c) 2012 by Delphix. All rights reserved.
25 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
26 */
27
28 #include <sys/dmu.h>
29 #include <sys/dmu_impl.h>
30 #include <sys/dmu_tx.h>
31 #include <sys/dbuf.h>
32 #include <sys/dnode.h>
33 #include <sys/zfs_context.h>
34 #include <sys/dmu_objset.h>
35 #include <sys/dmu_traverse.h>
36 #include <sys/dsl_dataset.h>
37 #include <sys/dsl_dir.h>
38 #include <sys/dsl_prop.h>
39 #include <sys/dsl_pool.h>
40 #include <sys/dsl_synctask.h>
41 #include <sys/zfs_ioctl.h>
42 #include <sys/zap.h>
43 #include <sys/zio_checksum.h>
44 #include <sys/zfs_znode.h>
45 #include <zfs_fletcher.h>
46 #include <sys/avl.h>
47 #include <sys/ddt.h>
48 #include <sys/zfs_onexit.h>
49
50 /* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */
51 int zfs_send_corrupt_data = B_FALSE;
52
53 static char *dmu_recv_tag = "dmu_recv_tag";
54
55 static int
56 dump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
57 {
58 dsl_dataset_t *ds = dsp->dsa_os->os_dsl_dataset;
59 ssize_t resid; /* have to get resid to get detailed errno */
60 ASSERT3U(len % 8, ==, 0);
61
62 dsp->dsa_err = 0;
63 if (!dsp->sendsize) {
64 fletcher_4_incremental_native(buf, len, &dsp->dsa_zc);
65 dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp,
66 (caddr_t)buf, len,
67 0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY,
68 CRED(), &resid);
69 }
70 mutex_enter(&ds->ds_sendstream_lock);
71 *dsp->dsa_off += len;
72 mutex_exit(&ds->ds_sendstream_lock);
73
74 return (dsp->dsa_err);
75 }
76
77 static int
78 dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
79 uint64_t length)
80 {
81 struct drr_free *drrf = &(dsp->dsa_drr->drr_u.drr_free);
82
83 if (length != -1ULL && offset + length < offset)
84 length = -1ULL;
85
86 /*
87 * If there is a pending op, but it's not PENDING_FREE, push it out,
88 * since free block aggregation can only be done for blocks of the
89 * same type (i.e., DRR_FREE records can only be aggregated with
344 (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i;
345 err = dump_dnode(dsp, dnobj, blk+i);
346 if (err)
347 break;
348 }
349 (void) arc_buf_remove_ref(abuf, &abuf);
350 } else if (type == DMU_OT_SA) {
351 uint32_t aflags = ARC_WAIT;
352 arc_buf_t *abuf;
353 int blksz = BP_GET_LSIZE(bp);
354
355 if (arc_read_nolock(NULL, spa, bp,
356 arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
357 ZIO_FLAG_CANFAIL, &aflags, zb) != 0)
358 return (EIO);
359
360 err = dump_spill(dsp, zb->zb_object, blksz, abuf->b_data);
361 (void) arc_buf_remove_ref(abuf, &abuf);
362 } else { /* it's a level-0 block of a regular object */
363 uint32_t aflags = ARC_WAIT;
364 arc_buf_t *abuf = NULL;
365 void *buf = NULL;
366 int blksz = BP_GET_LSIZE(bp);
367
368 if (!dsp->sendsize) {
369 if (dsl_read(NULL, spa, bp, pbuf,
370 arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
371 ZIO_FLAG_CANFAIL, &aflags, zb) != 0) {
372 if (zfs_send_corrupt_data) {
373 /* Send a block filled with 0x"zfs badd bloc" */
374 abuf = arc_buf_alloc(spa, blksz, &abuf,
375 ARC_BUFC_DATA);
376 uint64_t *ptr;
377 for (ptr = abuf->b_data;
378 (char *)ptr <
379 (char *)abuf->b_data + blksz;
380 ptr++)
381 *ptr = 0x2f5baddb10c;
382 } else {
383 return (EIO);
384 }
385 }
386 buf = abuf->b_data;
387 }
388
389 err = dump_data(dsp, type, zb->zb_object, zb->zb_blkid * blksz,
390 blksz, bp, buf);
391 if (!dsp->sendsize) {
392 (void) arc_buf_remove_ref(abuf, &abuf);
393 }
394 }
395
396 ASSERT(err == 0 || err == EINTR);
397 return (err);
398 }
399
400 /*
401 * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline.
402 * For example, they could both be snapshots of the same filesystem, and
403 * 'earlier' is before 'later'. Or 'earlier' could be the origin of
404 * 'later's filesystem. Or 'earlier' could be an older snapshot in the origin's
405 * filesystem. Or 'earlier' could be the origin's origin.
406 */
407 static boolean_t
408 is_before(dsl_dataset_t *later, dsl_dataset_t *earlier)
409 {
410 dsl_pool_t *dp = later->ds_dir->dd_pool;
411 int error;
412 boolean_t ret;
413 dsl_dataset_t *origin;
414
419 if (later->ds_dir == earlier->ds_dir)
420 return (B_TRUE);
421 if (!dsl_dir_is_clone(later->ds_dir))
422 return (B_FALSE);
423
424 rw_enter(&dp->dp_config_rwlock, RW_READER);
425 if (later->ds_dir->dd_phys->dd_origin_obj == earlier->ds_object) {
426 rw_exit(&dp->dp_config_rwlock);
427 return (B_TRUE);
428 }
429 error = dsl_dataset_hold_obj(dp,
430 later->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin);
431 rw_exit(&dp->dp_config_rwlock);
432 if (error != 0)
433 return (B_FALSE);
434 ret = is_before(origin, earlier);
435 dsl_dataset_rele(origin, FTAG);
436 return (ret);
437 }
438
439
440 int
441 dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp,
442 offset_t *off, boolean_t sendsize)
443 {
444 dsl_dataset_t *ds = tosnap->os_dsl_dataset;
445 dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL;
446 dmu_replay_record_t *drr;
447 dmu_sendarg_t *dsp;
448 int err;
449 uint64_t fromtxg = 0;
450
451 /* tosnap must be a snapshot */
452 if (ds->ds_phys->ds_next_snap_obj == 0)
453 return (EINVAL);
454
455 /*
456 * fromsnap must be an earlier snapshot from the same fs as tosnap,
457 * or the origin's fs.
458 */
459 if (fromds != NULL && !is_before(ds, fromds))
460 return (EXDEV);
461
462 drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
490 drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA;
491
492 if (fromds)
493 drr->drr_u.drr_begin.drr_fromguid = fromds->ds_phys->ds_guid;
494 dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname);
495
496 if (fromds)
497 fromtxg = fromds->ds_phys->ds_creation_txg;
498
499 dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP);
500
501 dsp->dsa_drr = drr;
502 dsp->dsa_vp = vp;
503 dsp->dsa_outfd = outfd;
504 dsp->dsa_proc = curproc;
505 dsp->dsa_os = tosnap;
506 dsp->dsa_off = off;
507 dsp->dsa_toguid = ds->ds_phys->ds_guid;
508 ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0);
509 dsp->dsa_pending_op = PENDING_NONE;
510 dsp->sendsize = sendsize;
511
512 mutex_enter(&ds->ds_sendstream_lock);
513 list_insert_head(&ds->ds_sendstreams, dsp);
514 mutex_exit(&ds->ds_sendstream_lock);
515
516 if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
517 err = dsp->dsa_err;
518 goto out;
519 }
520
521 if (dsp->sendsize) {
522 err = traverse_dataset(ds, fromtxg,
523 TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
524 backup_cb, dsp);
525 } else {
526 err = traverse_dataset(ds,
527 fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH,
528 backup_cb, dsp);
529 }
530
531 if (dsp->dsa_pending_op != PENDING_NONE)
532 if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0)
533 err = EINTR;
534
535 if (err) {
536 if (err == EINTR && dsp->dsa_err)
537 err = dsp->dsa_err;
538 goto out;
539 }
540
541 bzero(drr, sizeof (dmu_replay_record_t));
542 drr->drr_type = DRR_END;
543 drr->drr_u.drr_end.drr_checksum = dsp->dsa_zc;
544 drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid;
545
546 if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
547 err = dsp->dsa_err;
548 goto out;
549 }
|