3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  24  * Copyright (c) 2012 by Delphix. All rights reserved.
  25  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  26  */
  27 
  28 #include <sys/dmu.h>
  29 #include <sys/dmu_impl.h>
  30 #include <sys/dmu_tx.h>
  31 #include <sys/dbuf.h>
  32 #include <sys/dnode.h>
  33 #include <sys/zfs_context.h>
  34 #include <sys/dmu_objset.h>
  35 #include <sys/dmu_traverse.h>
  36 #include <sys/dsl_dataset.h>
  37 #include <sys/dsl_dir.h>
  38 #include <sys/dsl_prop.h>
  39 #include <sys/dsl_pool.h>
  40 #include <sys/dsl_synctask.h>
  41 #include <sys/zfs_ioctl.h>
  42 #include <sys/zap.h>
  43 #include <sys/zio_checksum.h>
  44 #include <sys/zfs_znode.h>
  45 #include <zfs_fletcher.h>
  46 #include <sys/avl.h>
  47 #include <sys/ddt.h>
  48 #include <sys/zfs_onexit.h>
  49 
  50 /* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */
  51 int zfs_send_corrupt_data = B_FALSE;
  52 
  53 static char *dmu_recv_tag = "dmu_recv_tag";
  54 
  55 static int
  56 dump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
  57 {
  58         dsl_dataset_t *ds = dsp->dsa_os->os_dsl_dataset;
  59         ssize_t resid; /* have to get resid to get detailed errno */
  60         ASSERT3U(len % 8, ==, 0);
  61 
  62         fletcher_4_incremental_native(buf, len, &dsp->dsa_zc);
  63         dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp,
  64             (caddr_t)buf, len,
  65             0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid);
  66 
  67         mutex_enter(&ds->ds_sendstream_lock);
  68         *dsp->dsa_off += len;
  69         mutex_exit(&ds->ds_sendstream_lock);
  70 
  71         return (dsp->dsa_err);
  72 }
  73 
  74 static int
  75 dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
  76     uint64_t length)
  77 {
  78         struct drr_free *drrf = &(dsp->dsa_drr->drr_u.drr_free);
  79 
  80         if (length != -1ULL && offset + length < offset)
  81                 length = -1ULL;
  82 
  83         /*
  84          * If there is a pending op, but it's not PENDING_FREE, push it out,
  85          * since free block aggregation can only be done for blocks of the
  86          * same type (i.e., DRR_FREE records can only be aggregated with
 
 341                             (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i;
 342                         err = dump_dnode(dsp, dnobj, blk+i);
 343                         if (err)
 344                                 break;
 345                 }
 346                 (void) arc_buf_remove_ref(abuf, &abuf);
 347         } else if (type == DMU_OT_SA) {
 348                 uint32_t aflags = ARC_WAIT;
 349                 arc_buf_t *abuf;
 350                 int blksz = BP_GET_LSIZE(bp);
 351 
 352                 if (arc_read_nolock(NULL, spa, bp,
 353                     arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
 354                     ZIO_FLAG_CANFAIL, &aflags, zb) != 0)
 355                         return (EIO);
 356 
 357                 err = dump_spill(dsp, zb->zb_object, blksz, abuf->b_data);
 358                 (void) arc_buf_remove_ref(abuf, &abuf);
 359         } else { /* it's a level-0 block of a regular object */
 360                 uint32_t aflags = ARC_WAIT;
 361                 arc_buf_t *abuf;
 362                 int blksz = BP_GET_LSIZE(bp);
 363 
 364                 if (dsl_read(NULL, spa, bp, pbuf,
 365                     arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
 366                     ZIO_FLAG_CANFAIL, &aflags, zb) != 0) {
 367                         if (zfs_send_corrupt_data) {
 368                                 /* Send a block filled with 0x"zfs badd bloc" */
 369                                 abuf = arc_buf_alloc(spa, blksz, &abuf,
 370                                     ARC_BUFC_DATA);
 371                                 uint64_t *ptr;
 372                                 for (ptr = abuf->b_data;
 373                                     (char *)ptr < (char *)abuf->b_data + blksz;
 374                                     ptr++)
 375                                         *ptr = 0x2f5baddb10c;
 376                         } else {
 377                                 return (EIO);
 378                         }
 379                 }
 380 
 381                 err = dump_data(dsp, type, zb->zb_object, zb->zb_blkid * blksz,
 382                     blksz, bp, abuf->b_data);
 383                 (void) arc_buf_remove_ref(abuf, &abuf);
 384         }
 385 
 386         ASSERT(err == 0 || err == EINTR);
 387         return (err);
 388 }
 389 
 390 /*
 391  * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline.
 392  * For example, they could both be snapshots of the same filesystem, and
 393  * 'earlier' is before 'later'.  Or 'earlier' could be the origin of
 394  * 'later's filesystem.  Or 'earlier' could be an older snapshot in the origin's
 395  * filesystem.  Or 'earlier' could be the origin's origin.
 396  */
 397 static boolean_t
 398 is_before(dsl_dataset_t *later, dsl_dataset_t *earlier)
 399 {
 400         dsl_pool_t *dp = later->ds_dir->dd_pool;
 401         int error;
 402         boolean_t ret;
 403         dsl_dataset_t *origin;
 404 
 
 409         if (later->ds_dir == earlier->ds_dir)
 410                 return (B_TRUE);
 411         if (!dsl_dir_is_clone(later->ds_dir))
 412                 return (B_FALSE);
 413 
 414         rw_enter(&dp->dp_config_rwlock, RW_READER);
 415         if (later->ds_dir->dd_phys->dd_origin_obj == earlier->ds_object) {
 416                 rw_exit(&dp->dp_config_rwlock);
 417                 return (B_TRUE);
 418         }
 419         error = dsl_dataset_hold_obj(dp,
 420             later->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin);
 421         rw_exit(&dp->dp_config_rwlock);
 422         if (error != 0)
 423                 return (B_FALSE);
 424         ret = is_before(origin, earlier);
 425         dsl_dataset_rele(origin, FTAG);
 426         return (ret);
 427 }
 428 
 429 int
 430 dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp,
 431     offset_t *off)
 432 {
 433         dsl_dataset_t *ds = tosnap->os_dsl_dataset;
 434         dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL;
 435         dmu_replay_record_t *drr;
 436         dmu_sendarg_t *dsp;
 437         int err;
 438         uint64_t fromtxg = 0;
 439 
 440         /* tosnap must be a snapshot */
 441         if (ds->ds_phys->ds_next_snap_obj == 0)
 442                 return (EINVAL);
 443 
 444         /*
 445          * fromsnap must be an earlier snapshot from the same fs as tosnap,
 446          * or the origin's fs.
 447          */
 448         if (fromds != NULL && !is_before(ds, fromds))
 449                 return (EXDEV);
 450 
 451         drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
 
 479                 drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA;
 480 
 481         if (fromds)
 482                 drr->drr_u.drr_begin.drr_fromguid = fromds->ds_phys->ds_guid;
 483         dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname);
 484 
 485         if (fromds)
 486                 fromtxg = fromds->ds_phys->ds_creation_txg;
 487 
 488         dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP);
 489 
 490         dsp->dsa_drr = drr;
 491         dsp->dsa_vp = vp;
 492         dsp->dsa_outfd = outfd;
 493         dsp->dsa_proc = curproc;
 494         dsp->dsa_os = tosnap;
 495         dsp->dsa_off = off;
 496         dsp->dsa_toguid = ds->ds_phys->ds_guid;
 497         ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0);
 498         dsp->dsa_pending_op = PENDING_NONE;
 499 
 500         mutex_enter(&ds->ds_sendstream_lock);
 501         list_insert_head(&ds->ds_sendstreams, dsp);
 502         mutex_exit(&ds->ds_sendstream_lock);
 503 
 504         if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
 505                 err = dsp->dsa_err;
 506                 goto out;
 507         }
 508 
 509         err = traverse_dataset(ds, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH,
 510             backup_cb, dsp);
 511 
 512         if (dsp->dsa_pending_op != PENDING_NONE)
 513                 if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0)
 514                         err = EINTR;
 515 
 516         if (err) {
 517                 if (err == EINTR && dsp->dsa_err)
 518                         err = dsp->dsa_err;
 519                 goto out;
 520         }
 521 
 522         bzero(drr, sizeof (dmu_replay_record_t));
 523         drr->drr_type = DRR_END;
 524         drr->drr_u.drr_end.drr_checksum = dsp->dsa_zc;
 525         drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid;
 526 
 527         if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
 528                 err = dsp->dsa_err;
 529                 goto out;
 530         }
 
 | 
 
 
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
  24  * Copyright (c) 2012 by Delphix. All rights reserved.
  25  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  26  */
  27 
  28 #include <sys/dmu.h>
  29 #include <sys/dmu_impl.h>
  30 #include <sys/dmu_tx.h>
  31 #include <sys/dbuf.h>
  32 #include <sys/dnode.h>
  33 #include <sys/zfs_context.h>
  34 #include <sys/dmu_objset.h>
  35 #include <sys/dmu_traverse.h>
  36 #include <sys/dsl_dataset.h>
  37 #include <sys/dsl_dir.h>
  38 #include <sys/dsl_prop.h>
  39 #include <sys/dsl_pool.h>
  40 #include <sys/dsl_synctask.h>
  41 #include <sys/zfs_ioctl.h>
  42 #include <sys/zap.h>
  43 #include <sys/zio_checksum.h>
  44 #include <sys/zfs_znode.h>
  45 #include <zfs_fletcher.h>
  46 #include <sys/avl.h>
  47 #include <sys/ddt.h>
  48 #include <sys/zfs_onexit.h>
  49 
  50 /* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */
  51 int zfs_send_corrupt_data = B_FALSE;
  52 
  53 static char *dmu_recv_tag = "dmu_recv_tag";
  54 
  55 static int
  56 dump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
  57 {
  58         dsl_dataset_t *ds = dsp->dsa_os->os_dsl_dataset;
  59         ssize_t resid; /* have to get resid to get detailed errno */
  60         ASSERT3U(len % 8, ==, 0);
  61 
  62         dsp->dsa_err = 0;
  63         if (!dsp->sendsize) {
  64                 fletcher_4_incremental_native(buf, len, &dsp->dsa_zc);
  65                 dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp,
  66                     (caddr_t)buf, len,
  67                     0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY,
  68                     CRED(), &resid);
  69         }
  70         mutex_enter(&ds->ds_sendstream_lock);
  71         *dsp->dsa_off += len;
  72         mutex_exit(&ds->ds_sendstream_lock);
  73 
  74         return (dsp->dsa_err);
  75 }
  76 
  77 static int
  78 dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
  79     uint64_t length)
  80 {
  81         struct drr_free *drrf = &(dsp->dsa_drr->drr_u.drr_free);
  82 
  83         if (length != -1ULL && offset + length < offset)
  84                 length = -1ULL;
  85 
  86         /*
  87          * If there is a pending op, but it's not PENDING_FREE, push it out,
  88          * since free block aggregation can only be done for blocks of the
  89          * same type (i.e., DRR_FREE records can only be aggregated with
 
 344                             (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i;
 345                         err = dump_dnode(dsp, dnobj, blk+i);
 346                         if (err)
 347                                 break;
 348                 }
 349                 (void) arc_buf_remove_ref(abuf, &abuf);
 350         } else if (type == DMU_OT_SA) {
 351                 uint32_t aflags = ARC_WAIT;
 352                 arc_buf_t *abuf;
 353                 int blksz = BP_GET_LSIZE(bp);
 354 
 355                 if (arc_read_nolock(NULL, spa, bp,
 356                     arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
 357                     ZIO_FLAG_CANFAIL, &aflags, zb) != 0)
 358                         return (EIO);
 359 
 360                 err = dump_spill(dsp, zb->zb_object, blksz, abuf->b_data);
 361                 (void) arc_buf_remove_ref(abuf, &abuf);
 362         } else { /* it's a level-0 block of a regular object */
 363                 uint32_t aflags = ARC_WAIT;
 364                 arc_buf_t *abuf = NULL;
 365                 void *buf = NULL;
 366                 int blksz = BP_GET_LSIZE(bp);
 367 
 368                 if (!dsp->sendsize) {
 369                         if (dsl_read(NULL, spa, bp, pbuf,
 370                             arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
 371                             ZIO_FLAG_CANFAIL, &aflags, zb) != 0) {
 372                                 if (zfs_send_corrupt_data) {
 373                                 /* Send a block filled with 0x"zfs badd bloc" */
 374                                         abuf = arc_buf_alloc(spa, blksz, &abuf,
 375                                             ARC_BUFC_DATA);
 376                                         uint64_t *ptr;
 377                                         for (ptr = abuf->b_data;
 378                                             (char *)ptr <
 379                                             (char *)abuf->b_data + blksz;
 380                                             ptr++)
 381                                                 *ptr = 0x2f5baddb10c;
 382                                 } else {
 383                                         return (EIO);
 384                                 }
 385                         }
 386                         buf = abuf->b_data;
 387                 }
 388 
 389                 err = dump_data(dsp, type, zb->zb_object, zb->zb_blkid * blksz,
 390                     blksz, bp, buf);
 391                 if (!dsp->sendsize) {
 392                         (void) arc_buf_remove_ref(abuf, &abuf);
 393                 }
 394         }
 395 
 396         ASSERT(err == 0 || err == EINTR);
 397         return (err);
 398 }
 399 
 400 /*
 401  * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline.
 402  * For example, they could both be snapshots of the same filesystem, and
 403  * 'earlier' is before 'later'.  Or 'earlier' could be the origin of
 404  * 'later's filesystem.  Or 'earlier' could be an older snapshot in the origin's
 405  * filesystem.  Or 'earlier' could be the origin's origin.
 406  */
 407 static boolean_t
 408 is_before(dsl_dataset_t *later, dsl_dataset_t *earlier)
 409 {
 410         dsl_pool_t *dp = later->ds_dir->dd_pool;
 411         int error;
 412         boolean_t ret;
 413         dsl_dataset_t *origin;
 414 
 
 419         if (later->ds_dir == earlier->ds_dir)
 420                 return (B_TRUE);
 421         if (!dsl_dir_is_clone(later->ds_dir))
 422                 return (B_FALSE);
 423 
 424         rw_enter(&dp->dp_config_rwlock, RW_READER);
 425         if (later->ds_dir->dd_phys->dd_origin_obj == earlier->ds_object) {
 426                 rw_exit(&dp->dp_config_rwlock);
 427                 return (B_TRUE);
 428         }
 429         error = dsl_dataset_hold_obj(dp,
 430             later->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin);
 431         rw_exit(&dp->dp_config_rwlock);
 432         if (error != 0)
 433                 return (B_FALSE);
 434         ret = is_before(origin, earlier);
 435         dsl_dataset_rele(origin, FTAG);
 436         return (ret);
 437 }
 438 
 439 
 440 int
 441 dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp,
 442     offset_t *off, boolean_t sendsize)
 443 {
 444         dsl_dataset_t *ds = tosnap->os_dsl_dataset;
 445         dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL;
 446         dmu_replay_record_t *drr;
 447         dmu_sendarg_t *dsp;
 448         int err;
 449         uint64_t fromtxg = 0;
 450 
 451         /* tosnap must be a snapshot */
 452         if (ds->ds_phys->ds_next_snap_obj == 0)
 453                 return (EINVAL);
 454 
 455         /*
 456          * fromsnap must be an earlier snapshot from the same fs as tosnap,
 457          * or the origin's fs.
 458          */
 459         if (fromds != NULL && !is_before(ds, fromds))
 460                 return (EXDEV);
 461 
 462         drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
 
 490                 drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA;
 491 
 492         if (fromds)
 493                 drr->drr_u.drr_begin.drr_fromguid = fromds->ds_phys->ds_guid;
 494         dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname);
 495 
 496         if (fromds)
 497                 fromtxg = fromds->ds_phys->ds_creation_txg;
 498 
 499         dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP);
 500 
 501         dsp->dsa_drr = drr;
 502         dsp->dsa_vp = vp;
 503         dsp->dsa_outfd = outfd;
 504         dsp->dsa_proc = curproc;
 505         dsp->dsa_os = tosnap;
 506         dsp->dsa_off = off;
 507         dsp->dsa_toguid = ds->ds_phys->ds_guid;
 508         ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0);
 509         dsp->dsa_pending_op = PENDING_NONE;
 510         dsp->sendsize = sendsize;
 511 
 512         mutex_enter(&ds->ds_sendstream_lock);
 513         list_insert_head(&ds->ds_sendstreams, dsp);
 514         mutex_exit(&ds->ds_sendstream_lock);
 515 
 516         if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
 517                 err = dsp->dsa_err;
 518                 goto out;
 519         }
 520 
 521         if (dsp->sendsize) {
 522                 err = traverse_dataset(ds, fromtxg,
 523                     TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
 524                     backup_cb, dsp);
 525         } else {
 526                 err = traverse_dataset(ds,
 527                     fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH,
 528                     backup_cb, dsp);
 529         }
 530 
 531         if (dsp->dsa_pending_op != PENDING_NONE)
 532                 if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0)
 533                         err = EINTR;
 534 
 535         if (err) {
 536                 if (err == EINTR && dsp->dsa_err)
 537                         err = dsp->dsa_err;
 538                 goto out;
 539         }
 540 
 541         bzero(drr, sizeof (dmu_replay_record_t));
 542         drr->drr_type = DRR_END;
 543         drr->drr_u.drr_end.drr_checksum = dsp->dsa_zc;
 544         drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid;
 545 
 546         if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
 547                 err = dsp->dsa_err;
 548                 goto out;
 549         }
 
 |